Merge "Share the runtime arena for faster compile times."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 0906753..6b0e6ff 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -94,6 +94,7 @@
 	jni/quick/x86_64/calling_convention_x86_64.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
+	optimizing/boolean_simplifier.cc \
 	optimizing/builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/code_generator.cc \
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index a8ec6c0..49b15fe 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -449,7 +449,7 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
-  int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f));
+  int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f));
   A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index b80fd74..2bcaaca 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1874,8 +1874,8 @@
     int32_t divisor = mir_graph_->ConstantValue(rl_src2);
     if (CanDivideByReciprocalMultiplyFloat(divisor)) {
       // Generate multiply by reciprocal instead of div.
-      float recip = 1.0f/bit_cast<int32_t, float>(divisor);
-      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<float, int32_t>(recip));
+      float recip = 1.0f/bit_cast<float, int32_t>(divisor);
+      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<int32_t, float>(recip));
       return true;
     }
   } else {
@@ -1883,7 +1883,7 @@
     if (CanDivideByReciprocalMultiplyDouble(divisor)) {
       // Generate multiply by reciprocal instead of div.
       double recip = 1.0/bit_cast<double, int64_t>(divisor);
-      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<double, int64_t>(recip));
+      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<int64_t, double>(recip));
       return true;
     }
   }
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f513ea8..70bfb81 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1510,25 +1510,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1547,16 +1547,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
                              f3, f4, f5, f6, f7, f8, f9, f10);
@@ -1580,25 +1580,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1617,16 +1617,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
                              i4, i5, i6, i7, i8, i9, i10);
@@ -1649,25 +1649,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1686,16 +1686,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
                              f7, i8, f8, i9, f9, i10, f10);
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
new file mode 100644
index 0000000..ecf9fa2
--- /dev/null
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "boolean_simplifier.h"
+
+namespace art {
+
+static bool EndsWithAnIf(HBasicBlock* block) {
+  return block->GetLastInstruction()->IsIf();
+}
+
+static bool HasSinglePhi(HBasicBlock* block) {
+  return !block->GetPhis().IsEmpty()
+      && block->GetFirstPhi()->GetNext() == nullptr;
+}
+
+// Returns true if 'block1' and 'block2' are empty, merge into the same single
+// successor and the successor can only be reached from them.
+static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
+  if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
+  HBasicBlock* succ1 = block1->GetSuccessors().Get(0);
+  HBasicBlock* succ2 = block2->GetSuccessors().Get(0);
+  return succ1 == succ2 && succ1->GetPredecessors().Size() == 2u;
+}
+
+// Returns true if the outcome of the branching matches the boolean value of
+// the branching condition.
+static bool PreservesCondition(HInstruction* input_true, HInstruction* input_false) {
+  return input_true->IsIntConstant() && input_true->AsIntConstant()->GetValue() == 1
+         && input_false->IsIntConstant() && input_false->AsIntConstant()->GetValue() == 0;
+}
+
+// Returns true if the outcome of the branching is exactly opposite of the
+// boolean value of the branching condition.
+static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false) {
+  return input_true->IsIntConstant() && input_true->AsIntConstant()->GetValue() == 0
+         && input_false->IsIntConstant() && input_false->AsIntConstant()->GetValue() == 1;
+}
+
+// Returns an instruction with the opposite boolean value from 'cond'.
+static HInstruction* GetOppositeCondition(HInstruction* cond) {
+  HGraph* graph = cond->GetBlock()->GetGraph();
+  ArenaAllocator* allocator = graph->GetArena();
+
+  if (cond->IsCondition()) {
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    if (cond->IsEqual()) {
+      return new (allocator) HNotEqual(lhs, rhs);
+    } else if (cond->IsNotEqual()) {
+      return new (allocator) HEqual(lhs, rhs);
+    } else if (cond->IsLessThan()) {
+      return new (allocator) HGreaterThanOrEqual(lhs, rhs);
+    } else if (cond->IsLessThanOrEqual()) {
+      return new (allocator) HGreaterThan(lhs, rhs);
+    } else if (cond->IsGreaterThan()) {
+      return new (allocator) HLessThanOrEqual(lhs, rhs);
+    } else if (cond->IsGreaterThanOrEqual()) {
+      return new (allocator) HLessThan(lhs, rhs);
+    }
+  } else if (cond->IsIntConstant()) {
+    int32_t value = cond->AsIntConstant()->GetValue();
+    if (value == 0) {
+      return graph->GetIntConstant1();
+    } else {
+      DCHECK_EQ(value, 1);
+      return graph->GetIntConstant0();
+    }
+  }
+
+  LOG(FATAL) << "Instruction " << cond->DebugName() << " used as a condition";
+  UNREACHABLE();
+}
+
+void HBooleanSimplifier::Run() {
+  // Iterate in post order in the unlikely case that removing one occurrence of
+  // the pattern empties a branch block of another occurrence. Otherwise the
+  // order does not matter.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!EndsWithAnIf(block)) continue;
+
+    // Find elements of the pattern.
+    HIf* if_instruction = block->GetLastInstruction()->AsIf();
+    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+    if (!BlocksDoMergeTogether(true_block, false_block)) {
+      continue;
+    }
+    HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+    if (!HasSinglePhi(merge_block)) {
+      continue;
+    }
+    HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+    HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+    HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+    // Check if the selection negates/preserves the value of the condition and
+    // if so, generate a suitable replacement instruction.
+    HInstruction* if_condition = if_instruction->InputAt(0);
+    HInstruction* replacement;
+    if (NegatesCondition(true_value, false_value)) {
+      replacement = GetOppositeCondition(if_condition);
+      if (replacement->GetBlock() == nullptr) {
+        block->InsertInstructionBefore(replacement, if_instruction);
+      }
+    } else if (PreservesCondition(true_value, false_value)) {
+      replacement = if_condition;
+    } else {
+      continue;
+    }
+
+    // Replace the selection outcome with the new instruction.
+    phi->ReplaceWith(replacement);
+    merge_block->RemovePhi(phi);
+
+    // Link the start/end blocks and remove empty branches.
+    graph_->MergeEmptyBranches(block, merge_block);
+
+    // Remove the original condition if it is now unused.
+    if (!if_condition->HasUses()) {
+      if_condition->GetBlock()->RemoveInstruction(if_condition);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
new file mode 100644
index 0000000..9fa9c5a
--- /dev/null
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This optimization recognizes a common pattern where a boolean value is
+// either casted to an integer or negated by selecting from zero/one integer
+// constants with an If statement. Because boolean values are internally
+// represented as zero/one, we can safely replace the pattern with a suitable
+// condition instruction.
+
+// Example: Negating a boolean value
+//     B1:
+//       z1   ParameterValue
+//       i2   IntConstant 0
+//       i3   IntConstant 1
+//       v4   Goto B2
+//     B2:
+//       z5   NotEquals [ z1 i2 ]
+//       v6   If [ z5 ] then B3 else B4
+//     B3:
+//       v7   Goto B5
+//     B4:
+//       v8   Goto B5
+//     B5:
+//       i9   Phi [ i3 i2 ]
+//       v10  Return [ i9 ]
+// turns into
+//     B1:
+//       z1   ParameterValue
+//       i2   IntConstant 0
+//       v4   Goto B2
+//     B2:
+//       z11  Equals [ z1 i2 ]
+//       v10  Return [ z11 ]
+//     B3, B4, B5: removed
+
+// Note: in order to recognize empty blocks, this optimization must be run
+// after the instruction simplifier has removed redundant suspend checks.
+
+#ifndef ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
+#define ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class HBooleanSimplifier : public HOptimization {
+ public:
+  explicit HBooleanSimplifier(HGraph* graph)
+    : HOptimization(graph, true, kBooleanSimplifierPassName) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index cbb41b1..a21c311 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -616,8 +616,8 @@
     DCHECK((optimized_invoke_type == invoke_type) || (optimized_invoke_type != kDirect)
            || compiler_driver_->GetCompilerOptions().GetCompilePic());
     bool is_recursive =
-        (target_method.dex_method_index == dex_compilation_unit_->GetDexMethodIndex());
-    DCHECK(!is_recursive || (target_method.dex_file == dex_compilation_unit_->GetDexFile()));
+        (target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex());
+    DCHECK(!is_recursive || (target_method.dex_file == outer_compilation_unit_->GetDexFile()));
     invoke = new (arena_) HInvokeStaticOrDirect(
         arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index,
         is_recursive, optimized_invoke_type);
@@ -711,7 +711,7 @@
   uint16_t field_index = instruction.VRegB_21c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
+  StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(*dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
@@ -724,10 +724,8 @@
     return false;
   }
 
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(*outer_compilation_unit_->GetDexFile())));
   Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass(
-      soa, outer_dex_cache, class_loader, outer_compilation_unit_)));
+      soa, dex_cache, class_loader, outer_compilation_unit_)));
 
   // The index at which the field's class is stored in the DexCache's type array.
   uint32_t storage_index;
@@ -740,7 +738,7 @@
 
   // TODO: find out why this check is needed.
   bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-      *dex_compilation_unit_->GetDexFile(), storage_index);
+      *outer_compilation_unit_->GetDexFile(), storage_index);
   bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
   bool is_referrer_class = (referrer_class.Get() == resolved_field->GetDeclaringClass());
 
@@ -2060,31 +2058,13 @@
   return true;
 }  // NOLINT(readability/fn_size)
 
-HIntConstant* HGraphBuilder::GetIntConstant0() {
-  if (constant0_ != nullptr) {
-    return constant0_;
-  }
-  constant0_ = new(arena_) HIntConstant(0);
-  entry_block_->AddInstruction(constant0_);
-  return constant0_;
-}
-
-HIntConstant* HGraphBuilder::GetIntConstant1() {
-  if (constant1_ != nullptr) {
-    return constant1_;
-  }
-  constant1_ = new(arena_) HIntConstant(1);
-  entry_block_->AddInstruction(constant1_);
-  return constant1_;
-}
-
 HIntConstant* HGraphBuilder::GetIntConstant(int32_t constant) {
   switch (constant) {
-    case 0: return GetIntConstant0();
-    case 1: return GetIntConstant1();
+    case 0: return graph_->GetIntConstant0();
+    case 1: return graph_->GetIntConstant1();
     default: {
       HIntConstant* instruction = new (arena_) HIntConstant(constant);
-      entry_block_->AddInstruction(instruction);
+      graph_->AddConstant(instruction);
       return instruction;
     }
   }
@@ -2092,7 +2072,7 @@
 
 HLongConstant* HGraphBuilder::GetLongConstant(int64_t constant) {
   HLongConstant* instruction = new (arena_) HLongConstant(constant);
-  entry_block_->AddInstruction(instruction);
+  graph_->AddConstant(instruction);
   return instruction;
 }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 96196de..c70170b 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -47,8 +47,6 @@
         exit_block_(nullptr),
         current_block_(nullptr),
         graph_(graph),
-        constant0_(nullptr),
-        constant1_(nullptr),
         dex_file_(dex_file),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
@@ -67,8 +65,6 @@
         exit_block_(nullptr),
         current_block_(nullptr),
         graph_(graph),
-        constant0_(nullptr),
-        constant1_(nullptr),
         dex_file_(nullptr),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
@@ -100,8 +96,6 @@
   void MaybeUpdateCurrentBlock(size_t index);
   HBasicBlock* FindBlockStartingAt(int32_t index) const;
 
-  HIntConstant* GetIntConstant0();
-  HIntConstant* GetIntConstant1();
   HIntConstant* GetIntConstant(int32_t constant);
   HLongConstant* GetLongConstant(int64_t constant);
   void InitializeLocals(uint16_t count);
@@ -253,9 +247,6 @@
   HBasicBlock* current_block_;
   HGraph* const graph_;
 
-  HIntConstant* constant0_;
-  HIntConstant* constant1_;
-
   // The dex file where the method being compiled is.
   const DexFile* const dex_file_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 561dcb7..bd6e943 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,16 +40,6 @@
   return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
 }
 
-static bool IsSingleGoto(HBasicBlock* block) {
-  HLoopInformation* loop_info = block->GetLoopInformation();
-  // TODO: Remove the null check b/19084197.
-  return (block->GetFirstInstruction() != nullptr)
-      && (block->GetFirstInstruction() == block->GetLastInstruction())
-      && block->GetLastInstruction()->IsGoto()
-      // Back edges generate the suspend check.
-      && (loop_info == nullptr || !loop_info->IsBackEdge(block));
-}
-
 void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
   Initialize();
   if (!is_leaf) {
@@ -74,7 +64,7 @@
 HBasicBlock* CodeGenerator::GetNextBlockToEmit() const {
   for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) {
     HBasicBlock* block = block_order_->Get(i);
-    if (!IsSingleGoto(block)) {
+    if (!block->IsSingleGoto()) {
       return block;
     }
   }
@@ -82,7 +72,7 @@
 }
 
 HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
-  while (IsSingleGoto(block)) {
+  while (block->IsSingleGoto()) {
     block = block->GetSuccessors().Get(0);
   }
   return block;
@@ -97,7 +87,7 @@
     // Don't generate code for an empty block. Its predecessors will branch to its successor
     // directly. Also, the label of that block will not be emitted, so this helps catch
     // errors where we reference that label.
-    if (IsSingleGoto(block)) continue;
+    if (block->IsSingleGoto()) continue;
     Bind(block);
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
@@ -628,7 +618,7 @@
               ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
           DCHECK_LT(i, environment_size);
         } else if (current->IsDoubleConstant()) {
-          int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue());
+          int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(
               i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
           stack_map_stream_.AddDexRegisterEntry(
@@ -641,7 +631,7 @@
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant()) << current->DebugName();
-          int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
+          int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
         }
         break;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ecaa6f0..07ca6b1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -271,7 +271,7 @@
       return 0;
     } else {
       DCHECK(constant->IsFloatConstant());
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     }
   }
 
@@ -281,12 +281,12 @@
     } else if (constant->IsNullConstant()) {
       return 0;
     } else if (constant->IsFloatConstant()) {
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     } else if (constant->IsLongConstant()) {
       return constant->AsLongConstant()->GetValue();
     } else {
       DCHECK(constant->IsDoubleConstant());
-      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
     }
   }
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0a069a7..d783903 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -883,7 +883,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1388,9 +1388,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
+  // Java language does not allow treating boolean as an integral type but our
+  // bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1407,6 +1412,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1451,6 +1458,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1487,6 +1496,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1503,6 +1514,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1536,6 +1549,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1582,6 +1597,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1597,6 +1614,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1654,6 +1673,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1692,6 +1713,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1707,6 +1730,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1773,6 +1798,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 99283a0..9455a91 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1621,7 +1621,7 @@
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
 
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 02b9b32..0a7d3fe 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -680,7 +680,7 @@
         value = constant->AsLongConstant()->GetValue();
       } else {
         DCHECK(constant->IsDoubleConstant());
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       }
       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
@@ -792,7 +792,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1370,9 +1370,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
+  // Java language does not allow treating boolean as an integral type but our
+  // bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1391,6 +1396,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1435,6 +1442,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1464,6 +1473,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1480,6 +1491,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1511,6 +1524,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1556,6 +1571,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1577,6 +1594,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1672,6 +1691,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1703,6 +1724,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1726,6 +1749,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1783,6 +1808,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -3665,7 +3692,7 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<float, int32_t>(fp_value);
+      int32_t value = bit_cast<int32_t, float>(fp_value);
       Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
@@ -3699,7 +3726,7 @@
     } else {
       DCHECK(constant->IsDoubleConstant());
       double dbl_value = constant->AsDoubleConstant()->GetValue();
-      int64_t value = bit_cast<double, int64_t>(dbl_value);
+      int64_t value = bit_cast<int64_t, double>(dbl_value);
       int32_t low_value = Low32Bits(value);
       int32_t high_value = High32Bits(value);
       Immediate low(low_value);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d09c8f8..bff8fc9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -625,7 +625,7 @@
       HConstant* constant = source.GetConstant();
       int64_t value = constant->AsLongConstant()->GetValue();
       if (constant->IsDoubleConstant()) {
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       } else {
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
@@ -729,7 +729,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1409,9 +1409,15 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // Java language does not allow treating boolean as an integral type but our
+  // bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1428,6 +1434,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1472,6 +1480,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1505,6 +1515,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1521,6 +1533,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1550,6 +1564,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1593,6 +1609,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1617,6 +1635,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1715,6 +1735,8 @@
     case Primitive::kPrimLong:
       switch (input_type) {
         DCHECK(out.IsRegister());
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1782,6 +1804,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1806,6 +1830,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1832,6 +1858,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -3344,7 +3372,7 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<float, int32_t>(fp_value);
+      int32_t value = bit_cast<int32_t, float>(fp_value);
       Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
@@ -3362,7 +3390,7 @@
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
       double fp_value =  constant->AsDoubleConstant()->GetValue();
-      int64_t value = bit_cast<double, int64_t>(fp_value);
+      int64_t value = bit_cast<int64_t, double>(fp_value);
       Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 76b9f4f..09a3ae4 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -227,13 +227,13 @@
   } else {
     HLoopInformation* loop_information = loop_header->GetLoopInformation();
     HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
-    if (loop_information->IsBackEdge(first_predecessor)) {
+    if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
     HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
-    if (!loop_information->IsBackEdge(second_predecessor)) {
+    if (!loop_information->IsBackEdge(*second_predecessor)) {
       AddError(StringPrintf(
           "Second predecessor of loop header %d is not a back edge.",
           id));
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 82d6357..968fe3e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -85,9 +85,11 @@
     return false;
   }
 
-  bool can_use_dex_cache = true;
   if (resolved_method->GetDexFile()->GetLocation().compare(outer_dex_file.GetLocation()) != 0) {
-    can_use_dex_cache = false;
+    VLOG(compiler) << "Did not inline "
+                   << PrettyMethod(method_index, outer_dex_file)
+                   << " because it is in a different dex file";
+    return false;
   }
 
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
@@ -122,7 +124,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) {
+  if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index)) {
     resolved_method->SetShouldNotInline();
     return false;
   }
@@ -134,8 +136,7 @@
 
 bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
                                  HInvoke* invoke_instruction,
-                                 uint32_t method_index,
-                                 bool can_use_dex_cache) const {
+                                 uint32_t method_index) const {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile();
@@ -144,10 +145,10 @@
     nullptr,
     outer_compilation_unit_.GetClassLoader(),
     outer_compilation_unit_.GetClassLinker(),
-    *resolved_method->GetDexFile(),
+    outer_dex_file,
     code_item,
     resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
-    resolved_method->GetDexMethodIndex(),
+    method_index,
     resolved_method->GetAccessFlags(),
     nullptr);
 
@@ -158,7 +159,7 @@
   HGraphBuilder builder(callee_graph,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
-                        resolved_method->GetDexFile(),
+                        &outer_dex_file,
                         compiler_driver_,
                         &inline_stats);
 
@@ -199,7 +200,7 @@
 
   if (depth_ + 1 < kDepthLimit) {
     HInliner inliner(
-        callee_graph, dex_compilation_unit, compiler_driver_, stats_, depth_ + 1);
+        callee_graph, outer_compilation_unit_, compiler_driver_, stats_, depth_ + 1);
     inliner.Run();
   }
 
@@ -234,13 +235,6 @@
                        << " needs an environment";
         return false;
       }
-
-      if (!can_use_dex_cache && current->NeedsDexCache()) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file)
-                       << " could not be inlined because " << current->DebugName()
-                       << " it is in a different dex file and requires access to the dex cache";
-        return false;
-      }
     }
   }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 4b7e2ff..1251977 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -48,8 +48,7 @@
   bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const;
   bool TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
                          HInvoke* invoke_instruction,
-                         uint32_t method_index,
-                         bool can_use_dex_cache) const;
+                         uint32_t method_index) const;
 
   const DexCompilationUnit& outer_compilation_unit_;
   CompilerDriver* const compiler_driver_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index a90ebce..6009cb5 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -185,7 +185,7 @@
   if (successor->IsLoopHeader()) {
     // If we split at a back edge boundary, make the new block the back edge.
     HLoopInformation* info = successor->GetLoopInformation();
-    if (info->IsBackEdge(block)) {
+    if (info->IsBackEdge(*block)) {
       info->RemoveBackEdge(block);
       info->AddBackEdge(new_block);
     }
@@ -287,19 +287,49 @@
   return true;
 }
 
+void HGraph::AddConstant(HConstant* instruction) {
+  HInstruction* last_instruction = entry_block_->GetLastInstruction();
+  if (last_instruction == nullptr || !last_instruction->IsControlFlow()) {
+    // Called from the builder. Insert at the end of the block.
+    entry_block_->AddInstruction(instruction);
+  } else {
+    // Entry block ends with control-flow. Insert before the last instruction.
+    entry_block_->InsertInstructionBefore(instruction, last_instruction);
+  }
+}
+
 HNullConstant* HGraph::GetNullConstant() {
   if (cached_null_constant_ == nullptr) {
     cached_null_constant_ = new (arena_) HNullConstant();
-    entry_block_->InsertInstructionBefore(cached_null_constant_,
-                                          entry_block_->GetLastInstruction());
+    AddConstant(cached_null_constant_);
   }
   return cached_null_constant_;
 }
 
+HIntConstant* HGraph::GetIntConstant0() {
+  if (cached_int_constant0_ == nullptr) {
+    cached_int_constant0_ = new (arena_) HIntConstant(0);
+    AddConstant(cached_int_constant0_);
+  }
+  return cached_int_constant0_;
+}
+
+HIntConstant* HGraph::GetIntConstant1() {
+  if (cached_int_constant1_ == nullptr) {
+    cached_int_constant1_ = new (arena_) HIntConstant(1);
+    AddConstant(cached_int_constant1_);
+  }
+  return cached_int_constant1_;
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
 
+void HLoopInformation::Remove(HBasicBlock* block) {
+  blocks_.ClearBit(block->GetBlockId());
+}
+
 void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
   if (blocks_.IsBitSet(block->GetBlockId())) {
     return;
@@ -621,7 +651,10 @@
 void HGraphVisitor::VisitInsertionOrder() {
   const GrowableArray<HBasicBlock*>& blocks = graph_->GetBlocks();
   for (size_t i = 0 ; i < blocks.Size(); i++) {
-    VisitBasicBlock(blocks.Get(i));
+    HBasicBlock* block = blocks.Get(i);
+    if (block != nullptr) {
+      VisitBasicBlock(block);
+    }
   }
 }
 
@@ -788,6 +821,17 @@
   return new_block;
 }
 
+bool HBasicBlock::IsSingleGoto() const {
+  HLoopInformation* loop_info = GetLoopInformation();
+  // TODO: Remove the null check b/19084197.
+  return GetFirstInstruction() != nullptr
+         && GetPhis().IsEmpty()
+         && GetFirstInstruction() == GetLastInstruction()
+         && GetLastInstruction()->IsGoto()
+         // Back edges generate the suspend check.
+         && (loop_info == nullptr || !loop_info->IsBackEdge(*this));
+}
+
 void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const {
   for (HInstruction* current = first_instruction_;
        current != nullptr;
@@ -811,14 +855,35 @@
 }
 
 void HInstructionList::Add(const HInstructionList& instruction_list) {
-  DCHECK(!IsEmpty());
-  AddAfter(last_instruction_, instruction_list);
+  if (IsEmpty()) {
+    first_instruction_ = instruction_list.first_instruction_;
+    last_instruction_ = instruction_list.last_instruction_;
+  } else {
+    AddAfter(last_instruction_, instruction_list);
+  }
+}
+
+void HBasicBlock::DisconnectFromAll() {
+  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario";
+
+  for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
+    predecessors_.Get(i)->successors_.Delete(this);
+  }
+  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
+    successors_.Get(i)->predecessors_.Delete(this);
+  }
+  dominator_->dominated_blocks_.Delete(this);
+
+  predecessors_.Reset();
+  successors_.Reset();
+  dominator_ = nullptr;
+  graph_ = nullptr;
 }
 
 void HBasicBlock::MergeWith(HBasicBlock* other) {
   DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(other->GetDominator()->IsEntryBlock() && other->GetGraph() != graph_)
+  DCHECK(dominated_blocks_.IsEmpty()
+         || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other))
       << "Unimplemented block merge scenario";
   DCHECK(other->GetPhis().IsEmpty());
 
@@ -1006,7 +1071,7 @@
     if (info != nullptr) {
       info->Add(to);
       to->SetLoopInformation(info);
-      if (info->IsBackEdge(at)) {
+      if (info->IsBackEdge(*at)) {
         // Only `at` can become a back edge, as the inlined blocks
         // are predecessors of `at`.
         DCHECK_EQ(1u, info->NumberOfBackEdges());
@@ -1020,6 +1085,57 @@
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
 
+void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) {
+  // Make sure this is a diamond control-flow path, find the two branches.
+  DCHECK_EQ(start_block->GetSuccessors().Size(), 2u);
+  DCHECK_EQ(end_block->GetPredecessors().Size(), 2u);
+  HBasicBlock* left_branch = start_block->GetSuccessors().Get(0);
+  HBasicBlock* right_branch = start_block->GetSuccessors().Get(1);
+  DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block);
+  DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block);
+  DCHECK_EQ(start_block, end_block->GetDominator());
+
+  // Disconnect the branches and merge the two blocks. This will move
+  // all instructions from 'end_block' to 'start_block'.
+  DCHECK(left_branch->IsSingleGoto());
+  DCHECK(right_branch->IsSingleGoto());
+  left_branch->DisconnectFromAll();
+  right_branch->DisconnectFromAll();
+  start_block->RemoveInstruction(start_block->GetLastInstruction());
+  start_block->MergeWith(end_block);
+
+  // Delete the now redundant blocks from the graph.
+  blocks_.Put(left_branch->GetBlockId(), nullptr);
+  blocks_.Put(right_branch->GetBlockId(), nullptr);
+  blocks_.Put(end_block->GetBlockId(), nullptr);
+
+  // Update reverse post order.
+  reverse_post_order_.Delete(left_branch);
+  reverse_post_order_.Delete(right_branch);
+  reverse_post_order_.Delete(end_block);
+
+  // Update loop information.
+  HLoopInformation* loop_info = start_block->GetLoopInformation();
+  if (kIsDebugBuild) {
+    if (loop_info != nullptr) {
+      DCHECK_EQ(loop_info, left_branch->GetLoopInformation());
+      DCHECK_EQ(loop_info, right_branch->GetLoopInformation());
+      DCHECK_EQ(loop_info, end_block->GetLoopInformation());
+    }
+  }
+  while (loop_info != nullptr) {
+    loop_info->Remove(left_branch);
+    loop_info->Remove(right_branch);
+    loop_info->Remove(end_block);
+    if (loop_info->IsBackEdge(*end_block)) {
+      loop_info->RemoveBackEdge(end_block);
+      loop_info->AddBackEdge(start_block);
+    }
+    // Move to parent loop if nested.
+    loop_info = loop_info->GetHeader()->GetDominator()->GetLoopInformation();
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
   ScopedObjectAccess soa(Thread::Current());
   os << "["
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 07ff8ba..97ade0d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -128,6 +128,7 @@
   void SetExitBlock(HBasicBlock* block) { exit_block_ = block; }
 
   void AddBlock(HBasicBlock* block);
+  void AddConstant(HConstant* instruction);
 
   // Try building the SSA form of this graph, with dominance computation and loop
   // recognition. Returns whether it was successful in doing all these steps.
@@ -154,6 +155,8 @@
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
   void InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
+  void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block);
+
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
   void SimplifyLoop(HBasicBlock* header);
 
@@ -217,6 +220,8 @@
   bool IsDebuggable() const { return debuggable_; }
 
   HNullConstant* GetNullConstant();
+  HIntConstant* GetIntConstant0();
+  HIntConstant* GetIntConstant1();
 
  private:
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
@@ -267,6 +272,10 @@
   // Cached null constant that might be created when building SSA form.
   HNullConstant* cached_null_constant_;
 
+  // Cached common constants often needed by optimization passes.
+  HIntConstant* cached_int_constant0_;
+  HIntConstant* cached_int_constant1_;
+
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -300,9 +309,9 @@
     back_edges_.Delete(back_edge);
   }
 
-  bool IsBackEdge(HBasicBlock* block) {
+  bool IsBackEdge(const HBasicBlock& block) const {
     for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
-      if (back_edges_.Get(i) == block) return true;
+      if (back_edges_.Get(i) == &block) return true;
     }
     return false;
   }
@@ -336,6 +345,7 @@
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
   void Add(HBasicBlock* block);
+  void Remove(HBasicBlock* block);
 
  private:
   // Internal recursive implementation of `Populate`.
@@ -391,6 +401,8 @@
     return graph_->GetExitBlock() == this;
   }
 
+  bool IsSingleGoto() const;
+
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
@@ -512,8 +524,16 @@
   // of `this` are moved to `other`.
   // Note that this method does not update the graph, reverse post order, loop
   // information, nor make sure the blocks are consistent (for example ending
+  // with a control flow instruction).
   void ReplaceWith(HBasicBlock* other);
 
+  // Disconnects `this` from all its predecessors, successors and the dominator.
+  // It assumes that `this` does not dominate any blocks.
+  // Note that this method does not update the graph, reverse post order, loop
+  // information, nor make sure the blocks are consistent (for example ending
+  // with a control flow instruction).
+  void DisconnectFromAll();
+
   void AddInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
@@ -1200,8 +1220,6 @@
     return NeedsEnvironment() || IsLoadClass() || IsLoadString();
   }
 
-  virtual bool NeedsDexCache() const { return false; }
-
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
@@ -1875,20 +1893,22 @@
   float GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) ==
-        bit_cast<float, int32_t>(value_);
+    return bit_cast<uint32_t, float>(other->AsFloatConstant()->value_) ==
+        bit_cast<uint32_t, float>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f));
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>((-1.0f));
   }
   bool IsZero() const OVERRIDE {
     return AsFloatConstant()->GetValue() == 0.0f;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f);
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>(1.0f);
   }
 
   DECLARE_INSTRUCTION(FloatConstant);
@@ -1906,20 +1926,22 @@
   double GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) ==
-        bit_cast<double, int64_t>(value_);
+    return bit_cast<uint64_t, double>(other->AsDoubleConstant()->value_) ==
+        bit_cast<uint64_t, double>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0));
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>((-1.0));
   }
   bool IsZero() const OVERRIDE {
     return AsDoubleConstant()->GetValue() == 0.0;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0);
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>(1.0);
   }
 
   DECLARE_INSTRUCTION(DoubleConstant);
@@ -2092,7 +2114,6 @@
 
   InvokeType GetInvokeType() const { return invoke_type_; }
   bool IsRecursive() const { return is_recursive_; }
-  bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); }
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
@@ -2975,8 +2996,6 @@
     return loaded_class_rti_.IsExact();
   }
 
-  bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; }
-
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
@@ -3012,7 +3031,6 @@
 
   // TODO: Can we deopt or debug when we resolve a string?
   bool NeedsEnvironment() const OVERRIDE { return false; }
-  bool NeedsDexCache() const OVERRIDE { return true; }
 
   DECLARE_INSTRUCTION(LoadString);
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f5d6598..eaa30df 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,6 +22,7 @@
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/timing_logger.h"
+#include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -313,6 +314,7 @@
   HDeadCodeElimination dce(graph);
   HConstantFolding fold1(graph);
   InstructionSimplifier simplify1(graph, stats);
+  HBooleanSimplifier boolean_not(graph);
 
   HInliner inliner(graph, dex_compilation_unit, driver, stats);
 
@@ -331,6 +333,9 @@
     &dce,
     &fold1,
     &simplify1,
+    // BooleanSimplifier depends on the InstructionSimplifier removing redundant
+    // suspend checks to recognize empty blocks.
+    &boolean_not,
     &inliner,
     &fold2,
     &side_effects,
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ba11e90..ae6bf16 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -359,12 +359,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue()));
+    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int32_t, float>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
@@ -381,12 +381,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue()));
+    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int64_t, double>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index c0d6f42..56ccd71 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -71,8 +71,8 @@
   //      for it.
   GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size());
   forward_predecessors.SetSize(graph_.GetBlocks().Size());
-  for (size_t i = 0, e = graph_.GetBlocks().Size(); i < e; ++i) {
-    HBasicBlock* block = graph_.GetBlocks().Get(i);
+  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().Size();
     if (block->IsLoopHeader()) {
       // We rely on having simplified the CFG.
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6296cf5..7144577 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDebuggerActive;     // debugger is making requests.
 static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
@@ -319,6 +318,7 @@
 static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
+bool Dbg::gDebuggerActive = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -331,7 +331,6 @@
 // Deoptimization support.
 std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
 size_t Dbg::full_deoptimization_event_count_ = 0;
-size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
 // Instrumentation event reference counters.
 size_t Dbg::dex_pc_change_event_ref_count_ = 0;
@@ -620,7 +619,7 @@
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
   // Only called from the JDWP handler thread.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     return;
   }
 
@@ -634,7 +633,6 @@
     MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
-    CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
     CHECK_EQ(dex_pc_change_event_ref_count_, 0U);
     CHECK_EQ(method_enter_event_ref_count_, 0U);
     CHECK_EQ(method_exit_event_ref_count_, 0U);
@@ -673,7 +671,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
 
   // Debugger may not be active at this point.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     {
       // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
       // This prevents us from having any pending deoptimization request when the debugger attaches
@@ -681,7 +679,6 @@
       MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
-      delayed_full_undeoptimization_count_ = 0U;
     }
     if (instrumentation_events_ != 0) {
       runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
@@ -704,10 +701,6 @@
   gDebuggerConnected = false;
 }
 
-bool Dbg::IsDebuggerActive() {
-  return gDebuggerActive;
-}
-
 void Dbg::ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options) {
   CHECK_NE(jdwp_options.transport, JDWP::kJdwpTransportUnknown);
   gJdwpOptions = jdwp_options;
@@ -3020,29 +3013,6 @@
   }
 }
 
-void Dbg::DelayFullUndeoptimization() {
-  if (RequiresDeoptimization()) {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    ++delayed_full_undeoptimization_count_;
-    DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
-  }
-}
-
-void Dbg::ProcessDelayedFullUndeoptimizations() {
-  // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
-  {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    while (delayed_full_undeoptimization_count_ > 0) {
-      DeoptimizationRequest req;
-      req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
-      req.SetMethod(nullptr);
-      RequestDeoptimizationLocked(req);
-      --delayed_full_undeoptimization_count_;
-    }
-  }
-  ManageDeoptimization();
-}
-
 void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
   if (req.GetKind() == DeoptimizationRequest::kNothing) {
     // Nothing to do.
@@ -3352,6 +3322,125 @@
   }
 }
 
+bool Dbg::IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m) {
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc == nullptr) {
+    // If we are not single-stepping, then we don't have to force interpreter.
+    return false;
+  }
+  if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
+    // If we are in interpreter only mode, then we don't have to force interpreter.
+    return false;
+  }
+
+  if (!m->IsNative() && !m->IsProxyMethod()) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Dbg::IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+    // If we are stepping out from a static initializer, by issuing a step
+    // in or step over, that was implicitly invoked by calling a static method,
+    // then we need to step into that method. Having a lower stack depth than
+    // the one the single step control has indicates that the step originates
+    // from the static initializer.
+    if (ssc->GetStepDepth() != JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // There are cases where we have to force interpreter on deoptimized methods,
+  // because in some cases the call will not be performed by invoking an entry
+  // point that has been replaced by the deoptimization, but instead by directly
+  // invoking the compiled code of the method, for example.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we are stepping out from a static initializer, by issuing a step
+    // out, that was implicitly invoked by calling a static method, then we
+    // need to step into the caller of that method. Having a lower stack
+    // depth than the one the single step control has indicates that the
+    // step originates from the static initializer.
+    if (ssc->GetStepDepth() == JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // If we are returning from a static intializer, that was implicitly
+  // invoked by calling a static method and the caller is deoptimized,
+  // then we have to deoptimize the stack without forcing interpreter
+  // on the static method that was called originally. This problem can
+  // be solved easily by forcing instrumentation on the called method,
+  // because the instrumentation exit hook will recognise the need of
+  // stack deoptimization by calling IsForcedInterpreterNeededForUpcall.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // The debugger is not interested in what is happening under the level
+    // of the step, thus we only force interpreter when we are not below of
+    // the step.
+    if (ssc->GetStackDepth() >= GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // We have to require stack deoptimization if the upcall is deoptimized.
+  return instrumentation->IsDeoptimized(m);
+}
+
 // Scoped utility class to suspend a thread so that we may do tasks such as walk its stack. Doesn't
 // cause suspension if the thread is the current thread.
 class ScopedThreadSuspension {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 01c9d5d..d015294 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -243,7 +243,9 @@
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
-  static bool IsDebuggerActive();
+  static bool IsDebuggerActive() {
+    return gDebuggerActive;
+  }
 
   // Configures JDWP with parsed command-line options.
   static void ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options);
@@ -543,13 +545,6 @@
       LOCKS_EXCLUDED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Support delayed full undeoptimization requests. This is currently only used for single-step
-  // events.
-  static void DelayFullUndeoptimization() LOCKS_EXCLUDED(Locks::deoptimization_lock_);
-  static void ProcessDelayedFullUndeoptimizations()
-      LOCKS_EXCLUDED(Locks::deoptimization_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Manage deoptimization after updating JDWP events list. Suspends all threads, processes each
   // request and finally resumes all threads.
   static void ManageDeoptimization()
@@ -564,6 +559,53 @@
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  /*
+   * Forced interpreter checkers for single-step and continue support.
+   */
+
+  // Indicates whether we need to force the use of interpreter to invoke a method.
+  // This allows to single-step or continue into the called method.
+  static bool IsForcedInterpreterNeededForCalling(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForCallingImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter entrypoint when calling a
+  // method through the resolution trampoline. This allows to single-step or continue into
+  // the called method.
+  static bool IsForcedInterpreterNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of instrumentation entrypoint when calling
+  // a method through the resolution trampoline. This allows to deoptimize the stack for
+  // debugging when we returned from the called method.
+  static bool IsForcedInstrumentationNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInstrumentationNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter when returning from the
+  // interpreter into the runtime. This allows to deoptimize the stack and continue
+  // execution with interpreter for debugging.
+  static bool IsForcedInterpreterNeededForUpcall(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForUpcallImpl(thread, m);
+  }
+
   // Single-stepping.
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
@@ -690,11 +732,27 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static bool IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  // Indicates whether the debugger is making requests.
+  static bool gDebuggerActive;
+
+  // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
   // Deoptimization requests to be processed each time the event list is updated. This is used when
@@ -709,10 +767,6 @@
   // undeoptimize when the last event is unregistered (when the counter is set to 0).
   static size_t full_deoptimization_event_count_ GUARDED_BY(Locks::deoptimization_lock_);
 
-  // Count the number of full undeoptimization requests delayed to next resume or end of debug
-  // session.
-  static size_t delayed_full_undeoptimization_count_ GUARDED_BY(Locks::deoptimization_lock_);
-
   static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event);
 
   // Weak global type cache, TODO improve this.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 70ee042..8351e22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -30,6 +30,7 @@
 #include "mirror/object_array-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "debugger.h"
 
 namespace art {
 
@@ -639,6 +640,14 @@
     JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    // Request a stack deoptimization if needed
+    mirror::ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+      self->SetException(Thread::GetDeoptimizationException());
+      self->SetDeoptimizationReturnValue(result);
+    }
+
     // No need to restore the args since the method has already been run by the interpreter.
     return result.GetJ();
   }
@@ -950,14 +959,37 @@
         called->GetDexCache()->SetResolvedMethod(called_dex_method_idx, called);
       }
     }
+
     // Ensure that the called method's class is initialized.
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(soa.Self(), called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromQuickCompiledCode();
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (UNLIKELY(Dbg::IsForcedInstrumentationNeededForResolution(self, caller))) {
+        // If the caller is deoptimized (by a breakpoint, for example), we have to
+        // continue its execution with interpreter when returning from the called
+        // method. Because we do not want to execute the called method with the
+        // interpreter, we wrap its execution into the instrumentation stubs.
+        // When the called method returns, it will execute the instrumentation
+        // exit hook that will determine the need of the interpreter with a call
+        // to Dbg::IsForcedInterpreterNeededForUpcall and deoptimize the stack if
+        // it is needed.
+        code = GetQuickInstrumentationEntryPoint();
+      } else {
+        code = called->GetEntryPointFromQuickCompiledCode();
+      }
     } else if (called_class->IsInitializing()) {
-      if (invoke_type == kStatic) {
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
         code = linker->GetQuickOatCodeFor(called);
@@ -1152,7 +1184,7 @@
       gpr_index_--;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushGpr(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushGpr(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushGpr(val);
       }
@@ -1160,7 +1192,7 @@
       stack_entries_++;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushStack(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushStack(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushStack(val);
       }
@@ -1220,16 +1252,16 @@
 
   void AdvanceFloat(float val) {
     if (kNativeSoftFloatAbi) {
-      AdvanceInt(bit_cast<float, uint32_t>(val));
+      AdvanceInt(bit_cast<uint32_t, float>(val));
     } else {
       if (HaveFloatFpr()) {
         fpr_index_--;
         if (kRegistersNeededForDouble == 1) {
           if (kMultiFPRegistersWidened) {
-            PushFpr8(bit_cast<double, uint64_t>(val));
+            PushFpr8(bit_cast<uint64_t, double>(val));
           } else {
             // No widening, just use the bits.
-            PushFpr8(bit_cast<float, uint64_t>(val));
+            PushFpr8(static_cast<uint64_t>(bit_cast<uint32_t, float>(val)));
           }
         } else {
           PushFpr4(val);
@@ -1240,9 +1272,9 @@
           // Need to widen before storing: Note the "double" in the template instantiation.
           // Note: We need to jump through those hoops to make the compiler happy.
           DCHECK_EQ(sizeof(uintptr_t), sizeof(uint64_t));
-          PushStack(static_cast<uintptr_t>(bit_cast<double, uint64_t>(val)));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint64_t, double>(val)));
         } else {
-          PushStack(bit_cast<float, uintptr_t>(val));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint32_t, float>(val)));
         }
         fpr_index_ = 0;
       }
@@ -1876,8 +1908,8 @@
       case 'F': {
         if (kRuntimeISA == kX86) {
           // Convert back the result to float.
-          double d = bit_cast<uint64_t, double>(result_f);
-          return bit_cast<float, uint32_t>(static_cast<float>(d));
+          double d = bit_cast<double, uint64_t>(result_f);
+          return bit_cast<uint32_t, float>(static_cast<float>(d));
         } else {
           return result_f;
         }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index b53b8cd..9adb4ac 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1030,7 +1030,8 @@
   NthCallerVisitor visitor(self, 1, true);
   visitor.WalkStack(true);
   bool deoptimize = (visitor.caller != nullptr) &&
-                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller));
+                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
+                    Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
   if (deoptimize) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 26ab602..a3ab026 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -18,6 +18,7 @@
 
 #include <cmath>
 
+#include "debugger.h"
 #include "mirror/array-inl.h"
 #include "unstarted_runtime.h"
 
@@ -616,8 +617,14 @@
           << PrettyMethod(new_shadow_frame->GetMethod());
       UNREACHABLE();
     }
-    (new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter())(self, code_item,
-                                                                    new_shadow_frame, result);
+    // Force the use of interpreter when it is required by the debugger.
+    mirror::EntryPointFromInterpreter* entry;
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForCalling(self, new_shadow_frame->GetMethod()))) {
+      entry = &art::artInterpreterToInterpreterBridge;
+    } else {
+      entry = new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter();
+    }
+    entry(self, code_item, new_shadow_frame, result);
   } else {
     UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index fbbc863..98dfdbd 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -460,7 +460,7 @@
 static void UnstartedDoubleDoubleToRawLongBits(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   double in = shadow_frame->GetVRegDouble(arg_offset);
-  result->SetJ(bit_cast<int64_t>(in));
+  result->SetJ(bit_cast<int64_t, double>(in));
 }
 
 static mirror::Object* GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4bf7142..c9a4483 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -133,7 +133,6 @@
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
       case EK_METHOD_EXIT_WITH_RETURN_VALUE:
-      case EK_SINGLE_STEP:
       case EK_FIELD_ACCESS:
       case EK_FIELD_MODIFICATION:
         return true;
@@ -278,16 +277,7 @@
         Dbg::UnconfigureStep(pMod->step.threadId);
       }
     }
-    if (pEvent->eventKind == EK_SINGLE_STEP) {
-      // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
-      // loop between each single-step. In a IDE, this would happens each time the user click on the
-      // "single-step" button. Here we delay the full undeoptimization to the next resume
-      // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
-      // runtime shutdown).
-      // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
-      // deoptimization and only the last single-step will trigger a full undeoptimization.
-      Dbg::DelayFullUndeoptimization();
-    } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
+    if (NeedsFullDeoptimization(pEvent->eventKind)) {
       CHECK_EQ(req.GetKind(), DeoptimizationRequest::kNothing);
       CHECK(req.Method() == nullptr);
       req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c7083dc..add1394 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -295,7 +295,6 @@
  */
 static JdwpError VM_Resume(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::ProcessDelayedFullUndeoptimizations();
   Dbg::ResumeVM();
   return ERR_NONE;
 }
@@ -989,8 +988,6 @@
     return ERR_NONE;
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   Dbg::ResumeThread(thread_id);
   return ERR_NONE;
 }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 3d69796..e2b88a5 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -322,8 +322,6 @@
     CHECK(event_list_ == nullptr);
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   /*
    * Should not have one of these in progress.  If the debugger went away
    * mid-request, though, we could see this.
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 939a1a9..f867f6a 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/value_object.h"
@@ -60,23 +61,6 @@
     *ComputeInternalPointer<T>(offset) = value;
   }
 
-  // TODO: Local hack to prevent name clashes between two conflicting
-  // implementations of bit_cast:
-  // - art::bit_cast<Destination, Source> runtime/base/casts.h, and
-  // - art::bit_cast<Source, Destination> from runtime/utils.h.
-  // Remove this when these routines have been merged.
-  template<typename Source, typename Destination>
-  static Destination local_bit_cast(Source in) {
-    static_assert(sizeof(Source) <= sizeof(Destination),
-                  "Size of Source not <= size of Destination");
-    union {
-      Source u;
-      Destination v;
-    } tmp;
-    tmp.u = in;
-    return tmp.v;
-  }
-
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` does not need to be word-aligned.
   template<typename T> T LoadUnaligned(uintptr_t offset) const {
@@ -88,7 +72,7 @@
       equivalent_unsigned_integer_value +=
           *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte);
     }
-    return local_bit_cast<U, T>(equivalent_unsigned_integer_value);
+    return bit_cast<T, U>(equivalent_unsigned_integer_value);
   }
 
   // Store `value` (of type `T`) at `offset`.  The memory address
@@ -96,7 +80,7 @@
   template<typename T> void StoreUnaligned(uintptr_t offset, T value) const {
     // Equivalent unsigned integer type corresponding to T.
     typedef typename UnsignedIntegerType<sizeof(T)>::type U;
-    U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value);
+    U equivalent_unsigned_integer_value = bit_cast<U, T>(value);
     // Write the value byte by byte in a little-endian fashion.
     for (size_t i = 0; i < sizeof(U); ++i) {
       *ComputeInternalPointer<uint8_t>(offset + i) =
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index ffee59e..c1f7594 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -401,7 +401,9 @@
 
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
-  if (UNLIKELY(!runtime->IsStarted())) {
+  // If the runtime is not yet started or it is required by the debugger, then perform the
+  // Invocation by the interpreter.
+  if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
       art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
     } else {
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 2d6b6b3..d11f1e9 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -153,7 +153,10 @@
   }
 
   static bool IsIntegralType(Type type) {
+    // Java language does not allow treating boolean as an integral type but our
+    // bit representation makes it safe.
     switch (type) {
+      case kPrimBoolean:
       case kPrimByte:
       case kPrimChar:
       case kPrimShort:
diff --git a/runtime/utils.h b/runtime/utils.h
index 9a9f51a..e20412e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -311,19 +311,6 @@
   return (ch < ' ' || ch > '~');
 }
 
-// Interpret the bit pattern of input (type U) as type V. Requires the size
-// of V >= size of U (compile-time checked).
-template<typename U, typename V>
-static inline V bit_cast(U in) {
-  static_assert(sizeof(U) <= sizeof(V), "Size of U not <= size of V");
-  union {
-    U u;
-    V v;
-  } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 937d2fe..6b4bc11 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -55,7 +55,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 5, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(1.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(1.0f);
       CHECK_EQ(value, cast);
 
       CHECK(GetVReg(m, 6, kIntVReg, &value));
@@ -95,7 +95,7 @@
       CHECK_EQ(value, 0u);
 
       CHECK(GetVRegPair(m, 13, kDoubleLoVReg, kDoubleHiVReg, &value));
-      uint64_t cast = bit_cast<double, uint64_t>(2.0);
+      uint64_t cast = bit_cast<uint64_t, double>(2.0);
       CHECK_EQ(value, cast);
     }
 
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
index 24d7832..0a83ac0 100644
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ b/test/455-set-vreg/set_vreg_jni.cc
@@ -60,21 +60,21 @@
       CHECK(GetVReg(m, 1, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVReg(m, 2, bit_cast<float, uint32_t>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<float, uint32_t>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<float, uint32_t>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<float, uint32_t>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<float, uint32_t>(1.0f), kFloatVReg));
+      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
+      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
+      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
+      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
+      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
     } else if (m_name.compare("testDoubleVReg") == 0) {
       uint32_t value = 0;
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVRegPair(m, 4, bit_cast<double, uint64_t>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<double, uint64_t>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<double, uint64_t>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<double, uint64_t>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<double, uint64_t>(1.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
     }
 
     return true;
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index ce701e8..1b32348 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -69,7 +69,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(4.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(4.0f);
       CHECK_EQ(value, cast);
       did_check_ = true;
     } else if (m_name.compare("phiEquivalent") == 0) {
@@ -138,7 +138,7 @@
   std::unique_ptr<Context> context(Context::Create());
   CHECK(soa.Decode<mirror::Object*>(main) == nullptr);
   CHECK_EQ(int_value, 0);
-  int32_t cast = bit_cast<float, int32_t>(float_value);
+  int32_t cast = bit_cast<int32_t, float>(float_value);
   CHECK_EQ(cast, 0);
   TestVisitor visitor(soa.Self(), context.get());
   visitor.WalkStack();
diff --git a/test/462-checker-inlining-across-dex-files/info.txt b/test/462-checker-inlining-across-dex-files/info.txt
deleted file mode 100644
index 57008c3..0000000
--- a/test/462-checker-inlining-across-dex-files/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Check our inlining heuristics across dex files in optimizing.
diff --git a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
deleted file mode 100644
index 61f4e43..0000000
--- a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-* Copyright (C) 2015 The Android Open Source Project
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-public class OtherDex {
-  public static void emptyMethod() {
-  }
-
-  public static int returnIntMethod() {
-    return 38;
-  }
-
-  public static int returnOtherDexStatic() {
-    return myStatic;
-  }
-
-  public static int returnMainStatic() {
-    return Main.myStatic;
-  }
-
-  public static int recursiveCall() {
-    return recursiveCall();
-  }
-
-  public static String returnString() {
-    return "OtherDex";
-  }
-
-  public static Class returnOtherDexClass() {
-    return OtherDex.class;
-  }
-
-  public static Class returnMainClass() {
-    return Main.class;
-  }
-
-  static int myStatic = 1;
-}
diff --git a/test/462-checker-inlining-across-dex-files/src/Main.java b/test/462-checker-inlining-across-dex-files/src/Main.java
deleted file mode 100644
index 23956c0..0000000
--- a/test/462-checker-inlining-across-dex-files/src/Main.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
-* Copyright (C) 2015 The Android Open Source Project
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-public class Main {
-
-  // CHECK-START: void Main.inlineEmptyMethod() inliner (before)
-  // CHECK-DAG:     [[Invoke:v\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      ReturnVoid
-
-  // CHECK-START: void Main.inlineEmptyMethod() inliner (after)
-  // CHECK-NOT:                      InvokeStaticOrDirect
-
-  public static void inlineEmptyMethod() {
-    OtherDex.emptyMethod();
-  }
-
-  // CHECK-START: int Main.inlineReturnIntMethod() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: int Main.inlineReturnIntMethod() inliner (after)
-  // CHECK-NOT:                      InvokeStaticOrDirect
-
-  // CHECK-START: int Main.inlineReturnIntMethod() inliner (after)
-  // CHECK-DAG:     [[Const38:i\d+]] IntConstant 38
-  // CHECK-DAG:                      Return [ [[Const38]] ]
-
-  public static int inlineReturnIntMethod() {
-    return OtherDex.returnIntMethod();
-  }
-
-  // CHECK-START: int Main.dontInlineOtherDexStatic() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: int Main.dontInlineOtherDexStatic() inliner (after)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-  public static int dontInlineOtherDexStatic() {
-    return OtherDex.returnOtherDexStatic();
-  }
-
-  // CHECK-START: int Main.inlineMainStatic() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: int Main.inlineMainStatic() inliner (after)
-  // CHECK-DAG:     [[Static:i\d+]]  StaticFieldGet
-  // CHECK-DAG:                      Return [ [[Static]] ]
-  public static int inlineMainStatic() {
-    return OtherDex.returnMainStatic();
-  }
-
-  // CHECK-START: int Main.dontInlineRecursiveCall() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: int Main.dontInlineRecursiveCall() inliner (after)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-  public static int dontInlineRecursiveCall() {
-    return OtherDex.recursiveCall();
-  }
-
-  // CHECK-START: java.lang.String Main.dontInlineReturnString() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: java.lang.String Main.dontInlineReturnString() inliner (after)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-  public static String dontInlineReturnString() {
-    return OtherDex.returnString();
-  }
-
-  // CHECK-START: java.lang.Class Main.dontInlineOtherDexClass() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: java.lang.Class Main.dontInlineOtherDexClass() inliner (after)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-  public static Class dontInlineOtherDexClass() {
-    return OtherDex.returnOtherDexClass();
-  }
-
-  // CHECK-START: java.lang.Class Main.inlineMainClass() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
-
-  // CHECK-START: java.lang.Class Main.inlineMainClass() inliner (after)
-  // CHECK-DAG:     [[Class:l\d+]]  LoadClass
-  // CHECK-DAG:                      Return [ [[Class]] ]
-  public static Class inlineMainClass() {
-    return OtherDex.returnMainClass();
-  }
-
-  public static void main(String[] args) {
-    inlineEmptyMethod();
-    if (inlineReturnIntMethod() != 38) {
-      throw new Error("Expected 38");
-    }
-
-    if (dontInlineOtherDexStatic() != 1) {
-      throw new Error("Expected 1");
-    }
-
-    if (inlineMainStatic() != 42) {
-      throw new Error("Expected 42");
-    }
-
-    if (dontInlineReturnString() != "OtherDex") {
-      throw new Error("Expected OtherDex");
-    }
-
-    if (dontInlineOtherDexClass() != OtherDex.class) {
-      throw new Error("Expected " + OtherDex.class);
-    }
-
-    if (inlineMainClass() != Main.class) {
-      throw new Error("Expected " + Main.class);
-    }
-  }
-
-  public static int myStatic = 42;
-}
diff --git a/test/462-checker-inlining-across-dex-files/expected.txt b/test/463-checker-boolean-simplifier/expected.txt
similarity index 100%
rename from test/462-checker-inlining-across-dex-files/expected.txt
rename to test/463-checker-boolean-simplifier/expected.txt
diff --git a/test/463-checker-boolean-simplifier/info.txt b/test/463-checker-boolean-simplifier/info.txt
new file mode 100644
index 0000000..9c0493a
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/info.txt
@@ -0,0 +1 @@
+Tests simplification of boolean NOT in optimizing compiler.
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
new file mode 100644
index 0000000..25f58b4
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -0,0 +1,174 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // Note #1: `javac` flips the conditions of If statements.
+  // Note #2: In the optimizing compiler, the first input of Phi is always
+  //          the fall-through path, i.e. the false branch.
+
+  public static void assertBoolEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /*
+   * Elementary test negating a boolean. Verifies that the condition is replaced,
+   * blocks merged and empty branches removed.
+   */
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[NotEq:z\d+]]    NotEqual [ [[Param]] [[Const0]] ]
+  // CHECK-DAG:                       If [ [[NotEq]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  // CHECK:                           Goto
+  // CHECK:                           Goto
+  // CHECK:                           Goto
+  // CHECK-NOT:                       Goto
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Eq:z\d+]]       Equal [ [[Param]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Eq]] ]
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK-NOT:                       NotEqual
+  // CHECK-NOT:                       If
+  // CHECK-NOT:                       Phi
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK:                           Goto
+  // CHECK-NOT:                       Goto
+
+  public static boolean BooleanNot(boolean x) {
+    return !x;
+  }
+
+  /*
+   * Program which only delegates the condition, i.e. returns 1 when True
+   * and 0 when False.
+   */
+
+  // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const0]] [[Const1]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       Return [ [[Cond]] ]
+
+  public static boolean GreaterThan(int x, int y) {
+    return (x <= y) ? false : true;
+  }
+
+  /*
+   * Program which negates a condition, i.e. returns 0 when True
+   * and 1 when False.
+   */
+
+  // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThanOrEqual [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     LessThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       Return [ [[Cond]] ]
+
+  public static boolean LessThan(int x, int y) {
+    return x < y;
+  }
+
+  /*
+   * Program which further uses negated conditions.
+   * Note that Phis are discovered retrospectively.
+   */
+
+  // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[CondXY:z\d+]]   GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[CondXY]] ]
+  // CHECK-DAG:     [[CondYZ:z\d+]]   GreaterThan [ [[ParamY]] [[ParamZ]] ]
+  // CHECK-DAG:                       If [ [[CondYZ]] ]
+  // CHECK-DAG:     [[CondXYZ:z\d+]]  NotEqual [ [[PhiXY:i\d+]] [[PhiYZ:i\d+]] ]
+  // CHECK-DAG:                       If [ [[CondXYZ]] ]
+  // CHECK-DAG:                       Return [ [[PhiXYZ:i\d+]] ]
+  // CHECK-DAG:     [[PhiXY]]         Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     [[PhiYZ]]         Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     [[PhiXYZ]]        Phi [ [[Const1]] [[Const0]] ]
+
+  // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[CmpXY:z\d+]]    LessThanOrEqual [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:     [[CmpYZ:z\d+]]    LessThanOrEqual [ [[ParamY]] [[ParamZ]] ]
+  // CHECK-DAG:     [[CmpXYZ:z\d+]]   Equal [ [[CmpXY]] [[CmpYZ]] ]
+  // CHECK-DAG:                       Return [ [[CmpXYZ]] ]
+
+  public static boolean ValuesOrdered(int x, int y, int z) {
+    return (x <= y) == (y <= z);
+  }
+
+  public static void main(String[] args) {
+    assertBoolEquals(false, BooleanNot(true));
+    assertBoolEquals(true, BooleanNot(false));
+    assertBoolEquals(true, GreaterThan(10, 5));
+    assertBoolEquals(false, GreaterThan(10, 10));
+    assertBoolEquals(false, GreaterThan(5, 10));
+    assertBoolEquals(true, LessThan(5, 10));
+    assertBoolEquals(false, LessThan(10, 10));
+    assertBoolEquals(false, LessThan(10, 5));
+    assertBoolEquals(true, ValuesOrdered(1, 3, 5));
+    assertBoolEquals(true, ValuesOrdered(5, 3, 1));
+    assertBoolEquals(false, ValuesOrdered(1, 3, 2));
+    assertBoolEquals(false, ValuesOrdered(2, 3, 1));
+    assertBoolEquals(true, ValuesOrdered(3, 3, 3));
+    assertBoolEquals(true, ValuesOrdered(3, 3, 5));
+    assertBoolEquals(false, ValuesOrdered(5, 5, 3));
+  }
+}
diff --git a/test/etc/default-build b/test/etc/default-build
index 928de57..58c9564 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -39,7 +39,7 @@
 fi
 
 mkdir classes
-${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+${JAVAC} -d classes `find src -name '*.java'`
 
 if [ -d src2 ]; then
   ${JAVAC} -d classes `find src2 -name '*.java'`
@@ -75,14 +75,3 @@
 if [ ${NEED_DEX} = "true" ]; then
   zip $TEST_NAME.jar classes.dex
 fi
-
-# Create a single jar with two dex files for multidex.
-if [ -d src-multidex ]; then
-  mkdir classes2
-  ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
-  if [ ${NEED_DEX} = "true" ]; then
-    ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
-      --dump-width=1000 ${DX_FLAGS} classes2
-    zip $TEST_NAME.jar classes.dex classes2.dex
-  fi
-fi