43 files changed, 1728 insertions, 723 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index d57f301ff9..b444fffd56 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -391,6 +391,7 @@ art_cc_test {
         mips64: {
             srcs: [
                 "linker/mips64/relative_patcher_mips64_test.cc",
+                "utils/mips64/managed_register_mips64_test.cc",
             ],
         },
         x86: {
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 808e28c9ea..538fe93793 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -70,10 +70,6 @@ class DexCompiler {
     return *unit_.GetDexFile();
   }
 
-  bool PerformOptimizations() const {
-    return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize;
-  }
-
   // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
   // a barrier is required.
   void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
@@ -114,7 +110,7 @@ class DexCompiler {
 };
 
 void DexCompiler::Compile() {
-  DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired);
+  DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
   const DexFile::CodeItem* code_item = unit_.GetCodeItem();
   const uint16_t* insns = code_item->insns_;
   const uint32_t insns_size = code_item->insns_size_in_code_units_;
@@ -221,7 +217,7 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
 }
 
 Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
-  if (!kEnableCheckCastEllision || !PerformOptimizations()) {
+  if (!kEnableCheckCastEllision) {
     return inst;
   }
   if (!driver_.IsSafeCast(&unit_, dex_pc)) {
@@ -254,7 +250,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
                                              uint32_t dex_pc,
                                              Instruction::Code new_opcode,
                                              bool is_put) {
-  if (!kEnableQuickening || !PerformOptimizations()) {
+  if (!kEnableQuickening) {
     return;
   }
   uint32_t field_idx = inst->VRegC_22c();
@@ -279,7 +275,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
 
 void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
                                        Instruction::Code new_opcode, bool is_range) {
-  if (!kEnableQuickening || !PerformOptimizations()) {
+  if (!kEnableQuickening) {
     return;
   }
   uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 00c596d60e..87ddb395ad 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -34,8 +34,7 @@ namespace optimizer {
 
 enum class DexToDexCompilationLevel {
   kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
-  kRequired,              // Dex-to-dex compilation required for correctness.
-  kOptimize               // Perform required transformation and peep-hole optimizations.
+  kOptimize               // Perform peep-hole optimizations.
 };
 std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 995098799c..e823f67d3c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -532,16 +532,13 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
   if (driver.GetCompilerOptions().GetDebuggable()) {
     // We are debuggable so definitions of classes might be changed. We don't want to do any
     // optimizations that could break that.
-    max_level = optimizer::DexToDexCompilationLevel::kRequired;
+    max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
   }
   if (klass->IsVerified()) {
     // Class is verified so we can enable DEX-to-DEX compilation for performance.
     return max_level;
-  } else if (klass->ShouldVerifyAtRuntime()) {
-    // Class verification has soft-failed. Anyway, ensure at least correctness.
-    return optimizer::DexToDexCompilationLevel::kRequired;
   } else {
-    // Class verification has failed: do not run DEX-to-DEX compilation.
+    // Class verification has failed: do not run DEX-to-DEX optimizations.
     return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
   }
 }
@@ -611,7 +608,7 @@ static void CompileMethod(Thread* self,
           dex_file,
           (verified_method != nullptr)
               ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompilationLevel::kRequired);
+              : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
     }
   } else if ((access_flags & kAccNative) != 0) {
     // Are we extracting only and have support for generic JNI down calls?
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 34ad1c5c08..a0c0a2acf6 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -27,7 +27,6 @@ CompilerOptions::CompilerOptions()
       small_method_threshold_(kDefaultSmallMethodThreshold),
       tiny_method_threshold_(kDefaultTinyMethodThreshold),
       num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
-      inline_depth_limit_(kUnsetInlineDepthLimit),
       inline_max_code_units_(kUnsetInlineMaxCodeUnits),
       no_inline_from_(nullptr),
       boot_image_(false),
@@ -62,7 +61,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
                                  size_t small_method_threshold,
                                  size_t tiny_method_threshold,
                                  size_t num_dex_methods_threshold,
-                                 size_t inline_depth_limit,
                                  size_t inline_max_code_units,
                                  const std::vector<const DexFile*>* no_inline_from,
                                  double top_k_profile_threshold,
@@ -86,7 +84,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
       small_method_threshold_(small_method_threshold),
       tiny_method_threshold_(tiny_method_threshold),
       num_dex_methods_threshold_(num_dex_methods_threshold),
-      inline_depth_limit_(inline_depth_limit),
       inline_max_code_units_(inline_max_code_units),
       no_inline_from_(no_inline_from),
       boot_image_(false),
@@ -130,10 +127,6 @@ void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usag
   ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage);
 }
 
-void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) {
-  ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage);
-}
-
 void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
   ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
 }
@@ -183,8 +176,6 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
     ParseTinyMethodMax(option, Usage);
   } else if (option.starts_with("--num-dex-methods=")) {
     ParseNumDexMethods(option, Usage);
-  } else if (option.starts_with("--inline-depth-limit=")) {
-    ParseInlineDepthLimit(option, Usage);
   } else if (option.starts_with("--inline-max-code-units=")) {
     ParseInlineMaxCodeUnits(option, Usage);
   } else if (option == "--generate-debug-info" || option == "-g") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 2e3e55f6c6..2376fbf5f5 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -46,15 +46,9 @@ class CompilerOptions FINAL {
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultGenerateDebugInfo = false;
   static const bool kDefaultGenerateMiniDebugInfo = false;
-  static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
-  static constexpr size_t kUnsetInlineDepthLimit = -1;
   static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
-  // Default inlining settings when the space filter is used.
-  static constexpr size_t kSpaceFilterInlineDepthLimit = 3;
-  static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10;
-
   CompilerOptions();
   ~CompilerOptions();
 
@@ -64,7 +58,6 @@ class CompilerOptions FINAL {
                   size_t small_method_threshold,
                   size_t tiny_method_threshold,
                   size_t num_dex_methods_threshold,
-                  size_t inline_depth_limit,
                   size_t inline_max_code_units,
                   const std::vector<const DexFile*>* no_inline_from,
                   double top_k_profile_threshold,
@@ -155,13 +148,6 @@ class CompilerOptions FINAL {
     return num_dex_methods_threshold_;
   }
 
-  size_t GetInlineDepthLimit() const {
-    return inline_depth_limit_;
-  }
-  void SetInlineDepthLimit(size_t limit) {
-    inline_depth_limit_ = limit;
-  }
-
   size_t GetInlineMaxCodeUnits() const {
     return inline_max_code_units_;
   }
@@ -275,7 +261,6 @@ class CompilerOptions FINAL {
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
   void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage);
-  void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage);
   void ParseNumDexMethods(const StringPiece& option, UsageFn Usage);
   void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
@@ -289,7 +274,6 @@ class CompilerOptions FINAL {
   size_t small_method_threshold_;
   size_t tiny_method_threshold_;
   size_t num_dex_methods_threshold_;
-  size_t inline_depth_limit_;
   size_t inline_max_code_units_;
 
   // Dex files from which we should not inline code.
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7ee494a131..897d81993d 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -363,7 +363,6 @@ void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
   }
   CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
   // Set inline filter values.
-  compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit);
   compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
   image_classes_.clear();
   if (!extra_dex.empty()) {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3ae7974038..ad951bcc3f 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -97,7 +97,6 @@ JitCompiler::JitCompiler() {
       CompilerOptions::kDefaultSmallMethodThreshold,
       CompilerOptions::kDefaultTinyMethodThreshold,
       CompilerOptions::kDefaultNumDexMethodsThreshold,
-      CompilerOptions::kDefaultInlineDepthLimit,
       CompilerOptions::kDefaultInlineMaxCodeUnits,
       /* no_inline_from */ nullptr,
       CompilerOptions::kDefaultTopKProfileThreshold,
@@ -177,10 +176,6 @@ JitCompiler::JitCompiler() {
     jit_logger_.reset(new JitLogger());
     jit_logger_->OpenLog();
   }
-
-  size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
-  DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max())
-      << "ProfilingInfo's inline counter can potentially overflow";
 }
 
 JitCompiler::~JitCompiler() {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2ee4db923a..476906a768 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -528,7 +528,8 @@ class BCEVisitor : public HGraphVisitor {
         has_dom_based_dynamic_bce_(false),
         initial_block_size_(graph->GetBlocks().size()),
         side_effects_(side_effects),
-        induction_range_(induction_analysis) {}
+        induction_range_(induction_analysis),
+        next_(nullptr) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
@@ -1618,8 +1619,8 @@ class BCEVisitor : public HGraphVisitor {
   void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
     block->InsertInstructionBefore(condition, block->GetLastInstruction());
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
     block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
     if (suspend->HasEnvironment()) {
       deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1631,8 +1632,8 @@ class BCEVisitor : public HGraphVisitor {
   void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
     HBasicBlock* block = bounds_check->GetBlock();
     block->InsertInstructionBefore(condition, bounds_check);
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
   }
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index fe423012ca..048073e37a 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -36,7 +36,8 @@ class CHAGuardVisitor : HGraphVisitor {
       : HGraphVisitor(graph),
         block_has_cha_guard_(GetGraph()->GetBlocks().size(),
                              0,
-                             graph->GetArena()->Adapter(kArenaAllocCHA)) {
+                             graph->GetArena()->Adapter(kArenaAllocCHA)),
+        instruction_iterator_(nullptr) {
     number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
     DCHECK_NE(number_of_guards_to_visit_, 0u);
     // Will recount number of guards during guard optimization.
@@ -201,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
     HInstruction* suspend = loop_info->GetSuspendCheck();
     // Need a new deoptimize instruction that copies the environment
     // of the suspend instruction for the loop.
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
     pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
     deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
         suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index caea250ab6..d7cc577580 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1134,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM {
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation of object ArrayGet
@@ -1602,14 +1602,20 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
   }
 }
 
-static Condition GenerateLongTestConstant(HCondition* condition,
-                                          bool invert,
-                                          CodeGeneratorARM* codegen) {
+static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
+                                                                bool invert,
+                                                                CodeGeneratorARM* codegen) {
   DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
 
   const LocationSummary* const locations = condition->GetLocations();
-  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-  Condition ret = EQ;
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<Condition, Condition> ret;
   const Location left = locations->InAt(0);
   const Location right = locations->InAt(1);
 
@@ -1629,22 +1635,26 @@ static Condition GenerateLongTestConstant(HCondition* condition,
       __ CmpConstant(left_high, High32Bits(value));
       __ it(EQ);
       __ cmp(left_low, ShifterOperand(Low32Bits(value)), EQ);
-      ret = ARMUnsignedCondition(cond);
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
       break;
     case kCondLE:
     case kCondGT:
       // Trivially true or false.
       if (value == std::numeric_limits<int64_t>::max()) {
         __ cmp(left_low, ShifterOperand(left_low));
-        ret = cond == kCondLE ? EQ : NE;
+        ret = cond == kCondLE ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
         break;
       }
 
       if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
         cond = kCondLT;
+        opposite = kCondGE;
       } else {
         DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
         cond = kCondGE;
+        opposite = kCondLT;
       }
 
       value++;
@@ -1653,7 +1663,7 @@ static Condition GenerateLongTestConstant(HCondition* condition,
     case kCondLT:
       __ CmpConstant(left_low, Low32Bits(value));
       __ sbcs(IP, left_high, ShifterOperand(High32Bits(value)));
-      ret = ARMCondition(cond);
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
       break;
     default:
       LOG(FATAL) << "Unreachable";
@@ -1663,14 +1673,20 @@ static Condition GenerateLongTestConstant(HCondition* condition,
   return ret;
 }
 
-static Condition GenerateLongTest(HCondition* condition,
-                                  bool invert,
-                                  CodeGeneratorARM* codegen) {
+static std::pair<Condition, Condition> GenerateLongTest(HCondition* condition,
+                                                        bool invert,
+                                                        CodeGeneratorARM* codegen) {
   DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
 
   const LocationSummary* const locations = condition->GetLocations();
-  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-  Condition ret = EQ;
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<Condition, Condition> ret;
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
@@ -1689,15 +1705,19 @@ static Condition GenerateLongTest(HCondition* condition,
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()),
              EQ);
-      ret = ARMUnsignedCondition(cond);
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
       break;
     case kCondLE:
     case kCondGT:
       if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
         cond = kCondGE;
+        opposite = kCondLT;
       } else {
         DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
         cond = kCondLT;
+        opposite = kCondGE;
       }
 
       std::swap(left, right);
@@ -1709,7 +1729,7 @@ static Condition GenerateLongTest(HCondition* condition,
       __ sbcs(IP,
               left.AsRegisterPairHigh<Register>(),
               ShifterOperand(right.AsRegisterPairHigh<Register>()));
-      ret = ARMCondition(cond);
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
       break;
     default:
       LOG(FATAL) << "Unreachable";
@@ -1719,90 +1739,83 @@ static Condition GenerateLongTest(HCondition* condition,
   return ret;
 }
 
-static Condition GenerateTest(HInstruction* instruction,
-                              Location loc,
-                              bool invert,
-                              CodeGeneratorARM* codegen) {
-  DCHECK(!instruction->IsConstant());
+static std::pair<Condition, Condition> GenerateTest(HCondition* condition,
+                                                    bool invert,
+                                                    CodeGeneratorARM* codegen) {
+  const LocationSummary* const locations = condition->GetLocations();
+  const Primitive::Type type = condition->GetLeft()->GetType();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+  std::pair<Condition, Condition> ret;
+  const Location right = locations->InAt(1);
 
-  Condition ret = invert ? EQ : NE;
+  if (invert) {
+    std::swap(cond, opposite);
+  }
 
-  if (IsBooleanValueOrMaterializedCondition(instruction)) {
-    __ CmpConstant(loc.AsRegister<Register>(), 0);
+  if (type == Primitive::kPrimLong) {
+    ret = locations->InAt(1).IsConstant()
+        ? GenerateLongTestConstant(condition, invert, codegen)
+        : GenerateLongTest(condition, invert, codegen);
+  } else if (Primitive::IsFloatingPointType(type)) {
+    GenerateVcmp(condition, codegen);
+    __ vmstat();
+    ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+                         ARMFPCondition(opposite, condition->IsGtBias()));
   } else {
-    HCondition* const condition = instruction->AsCondition();
-    const LocationSummary* const locations = condition->GetLocations();
-    const Primitive::Type type = condition->GetLeft()->GetType();
-    const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-    const Location right = locations->InAt(1);
-
-    if (type == Primitive::kPrimLong) {
-      ret = condition->GetLocations()->InAt(1).IsConstant()
-          ? GenerateLongTestConstant(condition, invert, codegen)
-          : GenerateLongTest(condition, invert, codegen);
-    } else if (Primitive::IsFloatingPointType(type)) {
-      GenerateVcmp(condition, codegen);
-      __ vmstat();
-      ret = ARMFPCondition(cond, condition->IsGtBias());
-    } else {
-      DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+    DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
 
-      const Register left = locations->InAt(0).AsRegister<Register>();
+    const Register left = locations->InAt(0).AsRegister<Register>();
 
-      if (right.IsRegister()) {
-        __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
-      } else {
-        DCHECK(right.IsConstant());
-        __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-
-      ret = ARMCondition(cond);
+    if (right.IsRegister()) {
+      __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+    } else {
+      DCHECK(right.IsConstant());
+      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
     }
+
+    ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
   }
 
   return ret;
 }
 
-static bool CanGenerateTest(HInstruction* condition, ArmAssembler* assembler) {
-  if (!IsBooleanValueOrMaterializedCondition(condition)) {
-    const HCondition* const cond = condition->AsCondition();
-
-    if (cond->GetLeft()->GetType() == Primitive::kPrimLong) {
-      const LocationSummary* const locations = cond->GetLocations();
-      const IfCondition c = cond->GetCondition();
-
-      if (locations->InAt(1).IsConstant()) {
-        const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
-        ShifterOperand so;
-
-        if (c < kCondLT || c > kCondGE) {
-          // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
-          // we check that the least significant half of the first input to be compared
-          // is in a low register (the other half is read outside an IT block), and
-          // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
-          // encoding can be used.
-          if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
-              !IsUint<8>(Low32Bits(value))) {
-            return false;
-          }
-        } else if (c == kCondLE || c == kCondGT) {
-          if (value < std::numeric_limits<int64_t>::max() &&
-              !assembler->ShifterOperandCanHold(kNoRegister,
-                                                kNoRegister,
-                                                SBC,
-                                                High32Bits(value + 1),
-                                                kCcSet,
-                                                &so)) {
-            return false;
-          }
-        } else if (!assembler->ShifterOperandCanHold(kNoRegister,
-                                                     kNoRegister,
-                                                     SBC,
-                                                     High32Bits(value),
-                                                     kCcSet,
-                                                     &so)) {
+static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
+  if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+    const LocationSummary* const locations = condition->GetLocations();
+    const IfCondition c = condition->GetCondition();
+
+    if (locations->InAt(1).IsConstant()) {
+      const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+      ShifterOperand so;
+
+      if (c < kCondLT || c > kCondGE) {
+        // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+        // we check that the least significant half of the first input to be compared
+        // is in a low register (the other half is read outside an IT block), and
+        // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+        // encoding can be used.
+        if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+            !IsUint<8>(Low32Bits(value))) {
+          return false;
+        }
+      } else if (c == kCondLE || c == kCondGT) {
+        if (value < std::numeric_limits<int64_t>::max() &&
+            !assembler->ShifterOperandCanHold(kNoRegister,
+                                              kNoRegister,
+                                              SBC,
+                                              High32Bits(value + 1),
+                                              kCcSet,
+                                              &so)) {
           return false;
         }
+      } else if (!assembler->ShifterOperandCanHold(kNoRegister,
+                                                   kNoRegister,
+                                                   SBC,
+                                                   High32Bits(value),
+                                                   kCcSet,
+                                                   &so)) {
+        return false;
       }
     }
   }
@@ -2415,13 +2428,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
-                                                  Label* true_label,
-                                                  Label* false_label ATTRIBUTE_UNUSED) {
-  __ vmstat();  // transfer FP status register to ARM APSR.
-  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
-}
-
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
                                                                Label* true_label,
                                                                Label* false_label) {
@@ -2438,7 +2444,6 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
 
   // Set the conditions for the test, remembering that == needs to be
   // decided using the low words.
-  // TODO: consider avoiding jumps with temporary and CMP low+SBC high
   switch (if_cond) {
     case kCondEQ:
     case kCondNE:
@@ -2509,25 +2514,38 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
 void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
                                                                Label* true_target_in,
                                                                Label* false_target_in) {
+  if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+    Label* non_fallthrough_target;
+    bool invert;
+
+    if (true_target_in == nullptr) {
+      DCHECK(false_target_in != nullptr);
+      non_fallthrough_target = false_target_in;
+      invert = true;
+    } else {
+      non_fallthrough_target = true_target_in;
+      invert = false;
+    }
+
+    const auto cond = GenerateTest(condition, invert, codegen_);
+
+    __ b(non_fallthrough_target, cond.first);
+
+    if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+      __ b(false_target_in);
+    }
+
+    return;
+  }
+
   // Generated branching requires both targets to be explicit. If either of the
   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
   Label fallthrough_target;
   Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
-  Primitive::Type type = condition->InputAt(0)->GetType();
-  switch (type) {
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(condition, true_target, false_target);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(condition, codegen_);
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected compare type " << type;
-  }
+  DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+  GenerateLongComparesAndJumps(condition, true_target, false_target);
 
   if (false_target != &fallthrough_target) {
     __ b(false_target);
@@ -2729,7 +2747,8 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
   }
 
   if (!Primitive::IsFloatingPointType(type) &&
-      CanGenerateTest(condition, codegen_->GetAssembler())) {
+      (IsBooleanValueOrMaterializedCondition(condition) ||
+       CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
     bool invert = false;
 
     if (out.Equals(second)) {
@@ -2753,7 +2772,14 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
         codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
       }
 
-      const Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_);
+      std::pair<Condition, Condition> cond;
+
+      if (IsBooleanValueOrMaterializedCondition(condition)) {
+        __ CmpConstant(locations->InAt(2).AsRegister<Register>(), 0);
+        cond = invert ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
+      } else {
+        cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+      }
 
       if (out.IsRegister()) {
         ShifterOperand operand;
@@ -2765,8 +2791,8 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
           operand = ShifterOperand(src.AsRegister<Register>());
         }
 
-        __ it(cond);
-        __ mov(out.AsRegister<Register>(), operand, cond);
+        __ it(cond.first);
+        __ mov(out.AsRegister<Register>(), operand, cond.first);
       } else {
         DCHECK(out.IsRegisterPair());
 
@@ -2784,10 +2810,10 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
           operand_low = ShifterOperand(src.AsRegisterPairLow<Register>());
         }
 
-        __ it(cond);
-        __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond);
-        __ it(cond);
-        __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond);
+        __ it(cond.first);
+        __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond.first);
+        __ it(cond.first);
+        __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond.first);
       }
 
       return;
@@ -2840,7 +2866,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
       break;
 
@@ -2867,51 +2893,44 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
     return;
   }
 
-  LocationSummary* locations = cond->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
-  Label true_label, false_label;
+  const Register out = cond->GetLocations()->Out().AsRegister<Register>();
 
-  switch (cond->InputAt(0)->GetType()) {
-    default: {
-      // Integer case.
-      if (right.IsRegister()) {
-        __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
-      } else {
-        DCHECK(right.IsConstant());
-        __ CmpConstant(left.AsRegister<Register>(),
-                       CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      __ it(ARMCondition(cond->GetCondition()), kItElse);
-      __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
-             ARMCondition(cond->GetCondition()));
-      __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0),
-             ARMCondition(cond->GetOppositeCondition()));
-      return;
-    }
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(cond, &true_label, &false_label);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(cond, codegen_);
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
+  if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
+    const auto condition = GenerateTest(cond, false, codegen_);
+
+    __ it(condition.first);
+    __ mov(out, ShifterOperand(1), condition.first);
+    __ it(condition.second);
+    __ mov(out, ShifterOperand(0), condition.second);
+    return;
   }
 
   // Convert the jumps into the result.
   Label done_label;
-  Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
+  Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
 
-  // False case: result = 0.
-  __ Bind(&false_label);
-  __ LoadImmediate(out, 0);
-  __ b(final_label);
+  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+    Label true_label, false_label;
 
-  // True case: result = 1.
-  __ Bind(&true_label);
-  __ LoadImmediate(out, 1);
+    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+    // False case: result = 0.
+    __ Bind(&false_label);
+    __ LoadImmediate(out, 0);
+    __ b(final_label);
+
+    // True case: result = 1.
+    __ Bind(&true_label);
+    __ LoadImmediate(out, 1);
+  } else {
+    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+    const auto condition = GenerateTest(cond, false, codegen_);
+
+    __ mov(out, ShifterOperand(0), AL, kCcKeep);
+    __ b(final_label, condition.second);
+    __ LoadImmediate(out, 1);
+  }
 
   if (done_label.IsLinked()) {
     __ Bind(&done_label);
@@ -7039,14 +7058,16 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  Label done, zero;
-  Label* final_label = codegen_->GetFinalLabel(instruction, &done);
+  Label done;
+  Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
   SlowPathCodeARM* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, &zero);
+    DCHECK_NE(out, obj);
+    __ LoadImmediate(out, 0);
+    __ CompareAndBranchIfZero(obj, final_label);
   }
 
   switch (type_check_kind) {
@@ -7058,11 +7079,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
                                         class_offset,
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
-      __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
-      __ b(&zero, NE);
-      __ LoadImmediate(out, 1);
-      __ b(final_label);
+      __ cmp(out, ShifterOperand(cls));
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (ArmAssembler::IsLowRegister(out)) {
+        __ it(EQ);
+        __ mov(out, ShifterOperand(1), EQ);
+      } else {
+        __ b(final_label, NE);
+        __ LoadImmediate(out, 1);
+      }
+
       break;
     }
 
@@ -7084,14 +7117,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
+      // If `out` is null, we use it for the result, and jump to the final label.
       __ CompareAndBranchIfZero(out, final_label);
       __ cmp(out, ShifterOperand(cls));
       __ b(&loop, NE);
       __ LoadImmediate(out, 1);
-      if (zero.IsLinked()) {
-        __ b(final_label);
-      }
       break;
     }
 
@@ -7114,14 +7144,32 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      __ CompareAndBranchIfNonZero(out, &loop);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ b(final_label);
-      __ Bind(&success);
-      __ LoadImmediate(out, 1);
-      if (zero.IsLinked()) {
+      // This is essentially a null check, but it sets the condition flags to the
+      // proper value for the code that follows the loop, i.e. not `EQ`.
+      __ cmp(out, ShifterOperand(1));
+      __ b(&loop, HS);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (ArmAssembler::IsLowRegister(out)) {
+        // If `out` is null, we use it for the result, and the condition flags
+        // have already been set to `NE`, so the IT block that comes afterwards
+        // (and which handles the successful case) turns into a NOP (instead of
+        // overwriting `out`).
+        __ Bind(&success);
+        // There is only one branch to the `success` label (which is bound to this
+        // IT block), and it has the same condition, `EQ`, so in that case the MOV
+        // is executed.
+        __ it(EQ);
+        __ mov(out, ShifterOperand(1), EQ);
+      } else {
+        // If `out` is null, we use it for the result, and jump to the final label.
         __ b(final_label);
+        __ Bind(&success);
+        __ LoadImmediate(out, 1);
       }
+
       break;
     }
 
@@ -7144,14 +7192,28 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
                                        component_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
+      // If `out` is null, we use it for the result, and jump to the final label.
       __ CompareAndBranchIfZero(out, final_label);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(out, &zero);
-      __ Bind(&exact_check);
-      __ LoadImmediate(out, 1);
-      __ b(final_label);
+      __ cmp(out, ShifterOperand(0));
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (ArmAssembler::IsLowRegister(out)) {
+        __ Bind(&exact_check);
+        __ it(EQ);
+        __ mov(out, ShifterOperand(1), EQ);
+      } else {
+        __ b(final_label, NE);
+        __ Bind(&exact_check);
+        __ LoadImmediate(out, 1);
+      }
+
       break;
     }
 
@@ -7171,9 +7233,6 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel(), NE);
       __ LoadImmediate(out, 1);
-      if (zero.IsLinked()) {
-        __ b(final_label);
-      }
       break;
     }
 
@@ -7202,18 +7261,10 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
                                                                     /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel());
-      if (zero.IsLinked()) {
-        __ b(final_label);
-      }
       break;
     }
   }
 
-  if (zero.IsLinked()) {
-    __ Bind(&zero);
-    __ LoadImmediate(out, 0);
-  }
-
   if (done.IsLinked()) {
     __ Bind(&done);
   }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 59a7f7c048..86f2f21df7 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
-  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 28cc942dfb..d463830ff6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1150,7 +1150,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation of object ArrayGet
@@ -3281,7 +3281,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
-  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   LocationSummary* locations = instruction->GetLocations();
   Register out = OutputRegister(instruction);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2d2d8109a3..cce412b314 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1175,7 +1175,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation of object ArrayGet
@@ -1687,14 +1687,21 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege
   }
 }
 
-static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
-                                                  bool invert,
-                                                  CodeGeneratorARMVIXL* codegen) {
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
+    HCondition* condition,
+    bool invert,
+    CodeGeneratorARMVIXL* codegen) {
   DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
 
   const LocationSummary* const locations = condition->GetLocations();
-  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-  vixl32::Condition ret = eq;
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
   const Location left = locations->InAt(0);
   const Location right = locations->InAt(1);
 
@@ -1713,13 +1720,14 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
     case kCondAE: {
       __ Cmp(left_high, High32Bits(value));
 
+      // We use the scope because of the IT block that follows.
       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
                                2 * vixl32::k16BitT32InstructionSizeInBytes,
                                CodeBufferCheckScope::kExactSize);
 
       __ it(eq);
       __ cmp(eq, left_low, Low32Bits(value));
-      ret = ARMUnsignedCondition(cond);
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
       break;
     }
     case kCondLE:
@@ -1727,15 +1735,19 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
       // Trivially true or false.
       if (value == std::numeric_limits<int64_t>::max()) {
         __ Cmp(left_low, left_low);
-        ret = cond == kCondLE ? eq : ne;
+        ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
         break;
       }
 
       if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
         cond = kCondLT;
+        opposite = kCondGE;
       } else {
         DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
         cond = kCondGE;
+        opposite = kCondLT;
       }
 
       value++;
@@ -1746,7 +1758,7 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
 
       __ Cmp(left_low, Low32Bits(value));
       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
-      ret = ARMCondition(cond);
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
       break;
     }
     default:
@@ -1757,14 +1769,21 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
   return ret;
 }
 
-static vixl32::Condition GenerateLongTest(HCondition* condition,
-                                          bool invert,
-                                          CodeGeneratorARMVIXL* codegen) {
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
+    HCondition* condition,
+    bool invert,
+    CodeGeneratorARMVIXL* codegen) {
   DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
 
   const LocationSummary* const locations = condition->GetLocations();
-  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-  vixl32::Condition ret = eq;
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
@@ -1779,22 +1798,27 @@ static vixl32::Condition GenerateLongTest(HCondition* condition,
     case kCondAE: {
       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
 
+      // We use the scope because of the IT block that follows.
       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
                                2 * vixl32::k16BitT32InstructionSizeInBytes,
                                CodeBufferCheckScope::kExactSize);
 
       __ it(eq);
       __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
-      ret = ARMUnsignedCondition(cond);
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
       break;
     }
     case kCondLE:
     case kCondGT:
       if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
         cond = kCondGE;
+        opposite = kCondLT;
       } else {
         DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
         cond = kCondLT;
+        opposite = kCondGE;
       }
 
       std::swap(left, right);
@@ -1805,7 +1829,7 @@ static vixl32::Condition GenerateLongTest(HCondition* condition,
 
       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
       __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
-      ret = ARMCondition(cond);
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
       break;
     }
     default:
@@ -1816,69 +1840,62 @@ static vixl32::Condition GenerateLongTest(HCondition* condition,
   return ret;
 }
 
-static vixl32::Condition GenerateTest(HInstruction* instruction,
-                                      Location loc,
-                                      bool invert,
-                                      CodeGeneratorARMVIXL* codegen) {
-  DCHECK(!instruction->IsConstant());
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
+                                                                    bool invert,
+                                                                    CodeGeneratorARMVIXL* codegen) {
+  const Primitive::Type type = condition->GetLeft()->GetType();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+  std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
 
-  vixl32::Condition ret = invert ? eq : ne;
+  if (invert) {
+    std::swap(cond, opposite);
+  }
 
-  if (IsBooleanValueOrMaterializedCondition(instruction)) {
-    __ Cmp(RegisterFrom(loc), 0);
+  if (type == Primitive::kPrimLong) {
+    ret = condition->GetLocations()->InAt(1).IsConstant()
+        ? GenerateLongTestConstant(condition, invert, codegen)
+        : GenerateLongTest(condition, invert, codegen);
+  } else if (Primitive::IsFloatingPointType(type)) {
+    GenerateVcmp(condition, codegen);
+    __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+    ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+                         ARMFPCondition(opposite, condition->IsGtBias()));
   } else {
-    HCondition* const condition = instruction->AsCondition();
-    const Primitive::Type type = condition->GetLeft()->GetType();
-    const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-
-    if (type == Primitive::kPrimLong) {
-      ret = condition->GetLocations()->InAt(1).IsConstant()
-          ? GenerateLongTestConstant(condition, invert, codegen)
-          : GenerateLongTest(condition, invert, codegen);
-    } else if (Primitive::IsFloatingPointType(type)) {
-      GenerateVcmp(condition, codegen);
-      __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
-      ret = ARMFPCondition(cond, condition->IsGtBias());
-    } else {
-      DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
-      __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
-      ret = ARMCondition(cond);
-    }
+    DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+    __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
+    ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
   }
 
   return ret;
 }
 
-static bool CanGenerateTest(HInstruction* condition, ArmVIXLAssembler* assembler) {
-  if (!IsBooleanValueOrMaterializedCondition(condition)) {
-    const HCondition* const cond = condition->AsCondition();
-
-    if (cond->GetLeft()->GetType() == Primitive::kPrimLong) {
-      const LocationSummary* const locations = cond->GetLocations();
-      const IfCondition c = cond->GetCondition();
+static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
+  if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+    const LocationSummary* const locations = condition->GetLocations();
+    const IfCondition c = condition->GetCondition();
 
-      if (locations->InAt(1).IsConstant()) {
-        const int64_t value = Int64ConstantFrom(locations->InAt(1));
+    if (locations->InAt(1).IsConstant()) {
+      const int64_t value = Int64ConstantFrom(locations->InAt(1));
 
-        if (c < kCondLT || c > kCondGE) {
-          // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
-          // we check that the least significant half of the first input to be compared
-          // is in a low register (the other half is read outside an IT block), and
-          // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
-          // encoding can be used.
-          if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
-            return false;
-          }
-        // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
-        // the previous one, but are not strictly necessary.
-        } else if (c == kCondLE || c == kCondGT) {
-          if (value < std::numeric_limits<int64_t>::max() &&
-              !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
-            return false;
-          }
-        } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+      if (c < kCondLT || c > kCondGE) {
+        // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+        // we check that the least significant half of the first input to be compared
+        // is in a low register (the other half is read outside an IT block), and
+        // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+        // encoding can be used.
+        if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+          return false;
+        }
+      // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
+      // the previous one, but are not strictly necessary.
+      } else if (c == kCondLE || c == kCondGT) {
+        if (value < std::numeric_limits<int64_t>::max() &&
+            !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
           return false;
         }
+      } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+        return false;
       }
     }
   }
@@ -2445,14 +2462,6 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond,
-                                                      vixl32::Label* true_label,
-                                                      vixl32::Label* false_label ATTRIBUTE_UNUSED) {
-  // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS).
-  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
-  __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label);
-}
-
 void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
                                                                    vixl32::Label* true_label,
                                                                    vixl32::Label* false_label) {
@@ -2469,7 +2478,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c
 
   // Set the conditions for the test, remembering that == needs to be
   // decided using the low words.
-  // TODO: consider avoiding jumps with temporary and CMP low+SBC high
   switch (if_cond) {
     case kCondEQ:
     case kCondNE:
@@ -2540,31 +2548,44 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c
 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
                                                                    vixl32::Label* true_target_in,
                                                                    vixl32::Label* false_target_in) {
+  if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+    vixl32::Label* non_fallthrough_target;
+    bool invert;
+
+    if (true_target_in == nullptr) {
+      DCHECK(false_target_in != nullptr);
+      non_fallthrough_target = false_target_in;
+      invert = true;
+    } else {
+      non_fallthrough_target = true_target_in;
+      invert = false;
+    }
+
+    const auto cond = GenerateTest(condition, invert, codegen_);
+
+    __ B(cond.first, non_fallthrough_target);
+
+    if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+      __ B(false_target_in);
+    }
+
+    return;
+  }
+
   // Generated branching requires both targets to be explicit. If either of the
   // targets is nullptr (fallthrough) use and bind `fallthrough` instead.
   vixl32::Label fallthrough;
   vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in;
   vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
 
-  Primitive::Type type = condition->InputAt(0)->GetType();
-  switch (type) {
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(condition, true_target, false_target);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(condition, codegen_);
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected compare type " << type;
-  }
+  DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+  GenerateLongComparesAndJumps(condition, true_target, false_target);
 
   if (false_target != &fallthrough) {
     __ B(false_target);
   }
 
-  if (true_target_in == nullptr || false_target_in == nullptr) {
+  if (fallthrough.IsReferenced()) {
     __ Bind(&fallthrough);
   }
 }
@@ -2759,7 +2780,8 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
   }
 
   if (!Primitive::IsFloatingPointType(type) &&
-      CanGenerateTest(condition, codegen_->GetAssembler())) {
+      (IsBooleanValueOrMaterializedCondition(condition) ||
+       CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
     bool invert = false;
 
     if (out.Equals(second)) {
@@ -2783,15 +2805,24 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
         codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
       }
 
-      const vixl32::Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_);
+      std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
+
+      if (IsBooleanValueOrMaterializedCondition(condition)) {
+        __ Cmp(InputRegisterAt(select, 2), 0);
+        cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
+      } else {
+        cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+      }
+
       const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
+      // We use the scope because of the IT block that follows.
       ExactAssemblyScope guard(GetVIXLAssembler(),
                                instr_count * vixl32::k16BitT32InstructionSizeInBytes,
                                CodeBufferCheckScope::kExactSize);
 
       if (out.IsRegister()) {
-        __ it(cond);
-        __ mov(cond, RegisterFrom(out), OperandFrom(src, type));
+        __ it(cond.first);
+        __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
       } else {
         DCHECK(out.IsRegisterPair());
 
@@ -2809,10 +2840,10 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
           operand_low = LowRegisterFrom(src);
         }
 
-        __ it(cond);
-        __ mov(cond, LowRegisterFrom(out), operand_low);
-        __ it(cond);
-        __ mov(cond, HighRegisterFrom(out), operand_high);
+        __ it(cond.first);
+        __ mov(cond.first, LowRegisterFrom(out), operand_low);
+        __ it(cond.first);
+        __ mov(cond.first, HighRegisterFrom(out), operand_high);
       }
 
       return;
@@ -2865,7 +2896,7 @@ void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
       break;
 
@@ -2892,50 +2923,48 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
     return;
   }
 
-  Location right = cond->GetLocations()->InAt(1);
-  vixl32::Register out = OutputRegister(cond);
-  vixl32::Label true_label, false_label;
+  const vixl32::Register out = OutputRegister(cond);
 
-  switch (cond->InputAt(0)->GetType()) {
-    default: {
-      // Integer case.
-      if (right.IsRegister()) {
-        __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
-      } else {
-        DCHECK(right.IsConstant());
-        __ Cmp(InputRegisterAt(cond, 0),
-               CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      ExactAssemblyScope aas(GetVIXLAssembler(),
-                             3 * vixl32::kMaxInstructionSizeInBytes,
-                             CodeBufferCheckScope::kMaximumSize);
-      __ ite(ARMCondition(cond->GetCondition()));
-      __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
-      __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
-      return;
-    }
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(cond, &true_label, &false_label);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(cond, codegen_);
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
+  if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
+    const auto condition = GenerateTest(cond, false, codegen_);
+    // We use the scope because of the IT block that follows.
+    ExactAssemblyScope guard(GetVIXLAssembler(),
+                             4 * vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+
+    __ it(condition.first);
+    __ mov(condition.first, out, 1);
+    __ it(condition.second);
+    __ mov(condition.second, out, 0);
+    return;
   }
 
   // Convert the jumps into the result.
   vixl32::Label done_label;
-  vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
+  vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
 
-  // False case: result = 0.
-  __ Bind(&false_label);
-  __ Mov(out, 0);
-  __ B(final_label);
+  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+    vixl32::Label true_label, false_label;
 
-  // True case: result = 1.
-  __ Bind(&true_label);
-  __ Mov(out, 1);
+    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+    // False case: result = 0.
+    __ Bind(&false_label);
+    __ Mov(out, 0);
+    __ B(final_label);
+
+    // True case: result = 1.
+    __ Bind(&true_label);
+    __ Mov(out, 1);
+  } else {
+    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+    const auto condition = GenerateTest(cond, false, codegen_);
+
+    __ Mov(LeaveFlags, out, 0);
+    __ B(condition.second, final_label, /* far_target */ false);
+    __ Mov(out, 1);
+  }
 
   if (done_label.IsReferenced()) {
     __ Bind(&done_label);
@@ -7079,14 +7108,16 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  vixl32::Label done, zero;
-  vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
+  vixl32::Label done;
+  vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
   SlowPathCodeARMVIXL* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false);
+    DCHECK(!out.Is(obj));
+    __ Mov(out, 0);
+    __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
   }
 
   switch (type_check_kind) {
@@ -7098,11 +7129,28 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
                                         class_offset,
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
-      __ Cmp(out, cls);
       // Classes must be equal for the instanceof to succeed.
-      __ B(ne, &zero, /* far_target */ false);
-      __ Mov(out, 1);
-      __ B(final_label);
+      __ Cmp(out, cls);
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ Mov(LeaveFlags, out, 0);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (out.IsLow()) {
+        // We use the scope because of the IT block that follows.
+        ExactAssemblyScope guard(GetVIXLAssembler(),
+                                 2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                 CodeBufferCheckScope::kExactSize);
+
+        __ it(eq);
+        __ mov(eq, out, 1);
+      } else {
+        __ B(ne, final_label, /* far_target */ false);
+        __ Mov(out, 1);
+      }
+
       break;
     }
 
@@ -7124,14 +7172,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
+      // If `out` is null, we use it for the result, and jump to the final label.
       __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
       __ Cmp(out, cls);
       __ B(ne, &loop, /* far_target */ false);
       __ Mov(out, 1);
-      if (zero.IsReferenced()) {
-        __ B(final_label);
-      }
       break;
     }
 
@@ -7154,14 +7199,38 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      __ CompareAndBranchIfNonZero(out, &loop);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ B(final_label);
-      __ Bind(&success);
-      __ Mov(out, 1);
-      if (zero.IsReferenced()) {
+      // This is essentially a null check, but it sets the condition flags to the
+      // proper value for the code that follows the loop, i.e. not `eq`.
+      __ Cmp(out, 1);
+      __ B(hs, &loop, /* far_target */ false);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (out.IsLow()) {
+        // If `out` is null, we use it for the result, and the condition flags
+        // have already been set to `ne`, so the IT block that comes afterwards
+        // (and which handles the successful case) turns into a NOP (instead of
+        // overwriting `out`).
+        __ Bind(&success);
+
+        // We use the scope because of the IT block that follows.
+        ExactAssemblyScope guard(GetVIXLAssembler(),
+                                 2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                 CodeBufferCheckScope::kExactSize);
+
+        // There is only one branch to the `success` label (which is bound to this
+        // IT block), and it has the same condition, `eq`, so in that case the MOV
+        // is executed.
+        __ it(eq);
+        __ mov(eq, out, 1);
+      } else {
+        // If `out` is null, we use it for the result, and jump to the final label.
         __ B(final_label);
+        __ Bind(&success);
+        __ Mov(out, 1);
       }
+
       break;
     }
 
@@ -7184,14 +7253,34 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
                                        component_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
+      // If `out` is null, we use it for the result, and jump to the final label.
       __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
-      __ Bind(&exact_check);
-      __ Mov(out, 1);
-      __ B(final_label);
+      __ Cmp(out, 0);
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ Mov(LeaveFlags, out, 0);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (out.IsLow()) {
+        __ Bind(&exact_check);
+
+        // We use the scope because of the IT block that follows.
+        ExactAssemblyScope guard(GetVIXLAssembler(),
+                                 2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                 CodeBufferCheckScope::kExactSize);
+
+        __ it(eq);
+        __ mov(eq, out, 1);
+      } else {
+        __ B(ne, final_label, /* far_target */ false);
+        __ Bind(&exact_check);
+        __ Mov(out, 1);
+      }
+
       break;
     }
 
@@ -7211,9 +7300,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
       codegen_->AddSlowPath(slow_path);
       __ B(ne, slow_path->GetEntryLabel());
       __ Mov(out, 1);
-      if (zero.IsReferenced()) {
-        __ B(final_label);
-      }
       break;
     }
 
@@ -7242,18 +7328,10 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
                                                                         /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ B(slow_path->GetEntryLabel());
-      if (zero.IsReferenced()) {
-        __ B(final_label);
-      }
       break;
     }
   }
 
-  if (zero.IsReferenced()) {
-    __ Bind(&zero);
-    __ Mov(out, 0);
-  }
-
   if (done.IsReferenced()) {
     __ Bind(&done);
   }
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 781027ab30..1e9669dc38 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -401,9 +401,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
                                     vixl::aarch32::Label* false_target);
-  void GenerateFPJumps(HCondition* cond,
-                       vixl::aarch32::Label* true_label,
-                       vixl::aarch32::Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond,
                                     vixl::aarch32::Label* true_label,
                                     vixl::aarch32::Label* false_label);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5246dbc5cb..c82533bc7d 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -558,26 +558,21 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
     return;
   }
 
-  // Make sure the frame size isn't unreasonably large. Per the various APIs
-  // it looks like it should always be less than 2GB in size, which allows
-  // us using 32-bit signed offsets from the stack pointer.
-  if (GetFrameSize() > 0x7FFFFFFF)
-    LOG(FATAL) << "Stack frame larger than 2GB";
+  // Make sure the frame size isn't unreasonably large.
+  if (GetFrameSize() > GetStackOverflowReservedBytes(kMips64)) {
+    LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips64) << " bytes";
+  }
 
   // Spill callee-saved registers.
-  // Note that their cumulative size is small and they can be indexed using
-  // 16-bit offsets.
-
-  // TODO: increment/decrement SP in one step instead of two or remove this comment.
 
-  uint32_t ofs = FrameEntrySpillSize();
+  uint32_t ofs = GetFrameSize();
   __ IncreaseFrameSize(ofs);
 
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     GpuRegister reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       ofs -= kMips64DoublewordSize;
-      __ Sd(reg, SP, ofs);
+      __ StoreToOffset(kStoreDoubleword, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
@@ -586,23 +581,16 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
     FpuRegister reg = kFpuCalleeSaves[i];
     if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
       ofs -= kMips64DoublewordSize;
-      __ Sdc1(reg, SP, ofs);
+      __ StoreFpuToOffset(kStoreDoubleword, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
-  // Allocate the rest of the frame and store the current method pointer
-  // at its end.
-
-  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
   // Save the current method if we need it. Note that we do not
   // do this in HCurrentMethod, as the instruction might have been removed
   // in the SSA graph.
   if (RequiresCurrentMethod()) {
-    static_assert(IsInt<16>(kCurrentMethodStackOffset),
-                  "kCurrentMethodStackOffset must fit into int16_t");
-    __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+    __ StoreToOffset(kStoreDoubleword, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
   }
 
   if (GetGraph()->HasShouldDeoptimizeFlag()) {
@@ -615,42 +603,32 @@ void CodeGeneratorMIPS64::GenerateFrameExit() {
   __ cfi().RememberState();
 
   if (!HasEmptyFrame()) {
-    // Deallocate the rest of the frame.
-
-    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
     // Restore callee-saved registers.
-    // Note that their cumulative size is small and they can be indexed using
-    // 16-bit offsets.
-
-    // TODO: increment/decrement SP in one step instead of two or remove this comment.
 
-    uint32_t ofs = 0;
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      FpuRegister reg = kFpuCalleeSaves[i];
-      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-        __ Ldc1(reg, SP, ofs);
-        ofs += kMips64DoublewordSize;
+    // For better instruction scheduling restore RA before other registers.
+    uint32_t ofs = GetFrameSize();
+    for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+      GpuRegister reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        ofs -= kMips64DoublewordSize;
+        __ LoadFromOffset(kLoadDoubleword, reg, SP, ofs);
         __ cfi().Restore(DWARFReg(reg));
       }
     }
 
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      GpuRegister reg = kCoreCalleeSaves[i];
-      if (allocated_registers_.ContainsCoreRegister(reg)) {
-        __ Ld(reg, SP, ofs);
-        ofs += kMips64DoublewordSize;
+    for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+      FpuRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        ofs -= kMips64DoublewordSize;
+        __ LoadFpuFromOffset(kLoadDoubleword, reg, SP, ofs);
         __ cfi().Restore(DWARFReg(reg));
       }
     }
 
-    DCHECK_EQ(ofs, FrameEntrySpillSize());
-    __ DecreaseFrameSize(ofs);
+    __ DecreaseFrameSize(GetFrameSize());
   }
 
-  __ Jr(RA);
-  __ Nop();
+  __ Jic(RA, 0);
 
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4db4796985..80776e8b78 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -723,7 +723,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ffc398287..49f099f6a9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -3660,7 +3660,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
-  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   bool is_div = instruction->IsDiv();
   LocationSummary* locations = instruction->GetLocations();
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cd954043f5..31cd204c9f 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -74,7 +74,6 @@ class CodegenTargetConfig {
   }
 
  private:
-  CodegenTargetConfig() {}
   InstructionSet isa_;
   CreateCodegenFn create_codegen_;
 };
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index e184745520..01304ac35b 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -66,6 +66,11 @@ inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
   return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
 }
 
+inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegisterPair()) << location;
+  return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>());
+}
+
 inline vixl::aarch32::Register RegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
   return vixl::aarch32::Register(location.reg());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0dfae11465..cc3c143b15 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -505,6 +505,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    StartAttributeStream("kind") << deoptimize->GetKind();
+  }
+
 #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
   void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 82ee93d5c2..9516ccb385 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -29,7 +29,21 @@ namespace art {
  */
 class InductionVarAnalysisTest : public CommonCompilerTest {
  public:
-  InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
+  InductionVarAnalysisTest()
+      : pool_(),
+        allocator_(&pool_),
+        iva_(nullptr),
+        entry_(nullptr),
+        return_(nullptr),
+        exit_(nullptr),
+        parameter_(nullptr),
+        constant0_(nullptr),
+        constant1_(nullptr),
+        constant2_(nullptr),
+        constant7_(nullptr),
+        constant100_(nullptr),
+        constantm1_(nullptr),
+        float_constant0_(nullptr) {
     graph_ = CreateGraph(&allocator_);
   }
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 62f5114e59..19f668dc1d 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,32 +46,100 @@
 
 namespace art {
 
-static constexpr size_t kMaximumNumberOfHInstructions = 32;
+// Instruction limit to control memory.
+static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
+
+// Maximum number of instructions for considering a method small,
+// which we will always try to inline if the other non-instruction limits
+// are not reached.
+static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3;
 
 // Limit the number of dex registers that we accumulate while inlining
 // to avoid creating large amount of nested environments.
 static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
 
-// Avoid inlining within a huge method due to memory pressure.
-static constexpr size_t kMaximumCodeUnitSize = 4096;
+// Limit recursive call inlining, which do not benefit from too
+// much inlining compared to code locality.
+static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
 
 // Controls the use of inline caches in AOT mode.
 static constexpr bool kUseAOTInlineCaches = false;
 
-void HInliner::Run() {
-  const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
-  if ((compiler_options.GetInlineDepthLimit() == 0)
-      || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
-    return;
+// We check for line numbers to make sure the DepthString implementation
+// aligns the output nicely.
+#define LOG_INTERNAL(msg) \
+  static_assert(__LINE__ > 10, "Unhandled line number"); \
+  static_assert(__LINE__ < 10000, "Unhandled line number"); \
+  VLOG(compiler) << DepthString(__LINE__) << msg
+
+#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
+#define LOG_NOTE() LOG_INTERNAL("Note: ")
+#define LOG_SUCCESS() LOG_INTERNAL("Success: ")
+#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
+
+std::string HInliner::DepthString(int line) const {
+  std::string value;
+  // Indent according to the inlining depth.
+  size_t count = depth_;
+  // Line numbers get printed in the log, so add a space if the log's line is less
+  // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright.
+  if (!kIsTargetBuild) {
+    if (line < 100) {
+      value += " ";
+    }
+    if (line < 1000) {
+      value += " ";
+    }
+    // Safeguard if this file reaches more than 10000 lines.
+    DCHECK_LT(line, 10000);
   }
-  if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
-    return;
+  for (size_t i = 0; i < count; ++i) {
+    value += "  ";
+  }
+  return value;
+}
+
+static size_t CountNumberOfInstructions(HGraph* graph) {
+  size_t number_of_instructions = 0;
+  for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) {
+    for (HInstructionIterator instr_it(block->GetInstructions());
+         !instr_it.Done();
+         instr_it.Advance()) {
+      ++number_of_instructions;
+    }
+  }
+  return number_of_instructions;
+}
+
+void HInliner::UpdateInliningBudget() {
+  if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) {
+    // Always try to inline small methods.
+    inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod;
+  } else {
+    inlining_budget_ = std::max(
+        kMaximumNumberOfInstructionsForSmallMethod,
+        kMaximumNumberOfTotalInstructions - total_number_of_instructions_);
   }
+}
+
+void HInliner::Run() {
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
     // doing some logic in the runtime to discover if a method could have been inlined.
     return;
   }
+
+  // Initialize the number of instructions for the method being compiled. Recursive calls
+  // to HInliner::Run have already updated the instruction count.
+  if (outermost_graph_ == graph_) {
+    total_number_of_instructions_ = CountNumberOfInstructions(graph_);
+  }
+
+  UpdateInliningBudget();
+  DCHECK_NE(total_number_of_instructions_, 0u);
+  DCHECK_NE(inlining_budget_, 0u);
+
   // Keep a copy of all blocks when starting the visit.
   ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
   DCHECK(!blocks.empty());
@@ -292,7 +360,18 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
     return nullptr;
   }
   PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize();
-  return resolved_method->GetSingleImplementation(pointer_size);
+  ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size);
+  if (single_impl == nullptr) {
+    return nullptr;
+  }
+  if (single_impl->IsProxyMethod()) {
+    // Proxy method is a generic invoker that's not worth
+    // devirtualizing/inlining. It also causes issues when the proxy
+    // method is in another dex file if we try to rewrite invoke-interface to
+    // invoke-virtual because a proxy method doesn't have a real dex file.
+    return nullptr;
+  }
+  return single_impl;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -305,17 +384,18 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
   ScopedObjectAccess soa(Thread::Current());
   uint32_t method_index = invoke_instruction->GetDexMethodIndex();
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
+  LOG_TRY() << caller_dex_file.PrettyMethod(method_index);
 
-  // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
-  ArtMethod* actual_method = nullptr;
   if (resolved_method == nullptr) {
     DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
     DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
-    VLOG(compiler) << "Not inlining a String.<init> method";
+    LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method";
     return false;
-  } else if (invoke_instruction->IsInvokeStaticOrDirect()) {
+  }
+  ArtMethod* actual_method = nullptr;
+
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
     actual_method = resolved_method;
   } else {
     // Check if we can statically find the method.
@@ -328,6 +408,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
     if (method != nullptr) {
       cha_devirtualize = true;
       actual_method = method;
+      LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod();
     }
   }
 
@@ -390,16 +471,23 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
       : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
 
   switch (inline_cache_type) {
-    case kInlineCacheNoData:
-      break;
+    case kInlineCacheNoData: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " could not be statically determined";
+      return false;
+    }
 
-    case kInlineCacheUninitialized:
-      VLOG(compiler) << "Interface or virtual call to "
-                     << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
-                     << " is not hit and not inlined";
+    case kInlineCacheUninitialized: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " is not hit and not inlined";
       return false;
+    }
 
-    case kInlineCacheMonomorphic:
+    case kInlineCacheMonomorphic: {
       MaybeRecordStat(kMonomorphicCall);
       if (outermost_graph_->IsCompilingOsr()) {
         // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
@@ -408,23 +496,29 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
       } else {
         return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
       }
+    }
 
-    case kInlineCachePolymorphic:
+    case kInlineCachePolymorphic: {
       MaybeRecordStat(kPolymorphicCall);
       return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+    }
 
-    case kInlineCacheMegamorphic:
-      VLOG(compiler) << "Interface or virtual call to "
-                     << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
-                     << " is megamorphic and not inlined";
+    case kInlineCacheMegamorphic: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " is megamorphic and not inlined";
       MaybeRecordStat(kMegamorphicCall);
       return false;
+    }
 
-    case kInlineCacheMissingTypes:
-      VLOG(compiler) << "Interface or virtual call to "
-                     << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
-                     << " is missing types and not inlined";
+    case kInlineCacheMissingTypes: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " is missing types and not inlined";
       return false;
+    }
   }
   UNREACHABLE();
 }
@@ -587,9 +681,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
   dex::TypeIndex class_index = FindClassIndexIn(
       GetMonomorphicType(classes), caller_compilation_unit_);
   if (!class_index.IsValid()) {
-    VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
-                   << " from inline cache is not inlined because its class is not"
-                   << " accessible to the caller";
+    LOG_FAIL(kNotInlinedDexCache)
+        << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+        << " from inline cache is not inlined because its class is not"
+        << " accessible to the caller";
     return false;
   }
 
@@ -603,6 +698,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
     resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
         resolved_method, pointer_size);
   }
+  LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
   DCHECK(resolved_method != nullptr);
   HInstruction* receiver = invoke_instruction->InputAt(0);
   HInstruction* cursor = invoke_instruction->GetPrevious();
@@ -646,7 +742,8 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
       HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
   HInstruction* compare = new (graph_->GetArena()) HNotEqual(
       deopt_flag, graph_->GetIntConstant(0, dex_pc));
-  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
+      graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
 
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -710,9 +807,16 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
   bb_cursor->InsertInstructionAfter(compare, load_class);
   if (with_deoptimization) {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-        compare, invoke_instruction->GetDexPc());
+        graph_->GetArena(),
+        compare,
+        receiver,
+        HDeoptimize::Kind::kInline,
+        invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+    DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
+    receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
+    deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
   }
   return compare;
 }
@@ -752,6 +856,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
 
     dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
     HInstruction* return_replacement = nullptr;
+    LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod();
     if (!class_index.IsValid() ||
         !TryBuildAndInline(invoke_instruction,
                            method,
@@ -761,8 +866,8 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
     } else {
       one_target_inlined = true;
 
-      VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
-                     << " has inlined " << ArtMethod::PrettyMethod(method);
+      LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
+                    << " has inlined " << ArtMethod::PrettyMethod(method);
 
       // If we have inlined all targets before, and this receiver is the last seen,
       // we deoptimize instead of keeping the original invoke instruction.
@@ -796,9 +901,10 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
   }
 
   if (!one_target_inlined) {
-    VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
-                   << " from inline cache is not inlined because none"
-                   << " of its targets could be inlined";
+    LOG_FAIL_NO_STAT()
+        << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+        << " from inline cache is not inlined because none"
+        << " of its targets could be inlined";
     return false;
   }
 
@@ -932,9 +1038,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
       actual_method = new_method;
     } else if (actual_method != new_method) {
       // Different methods, bailout.
-      VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
-                     << " from inline cache is not inlined because it resolves"
-                     << " to different methods";
       return false;
     }
   }
@@ -988,13 +1091,19 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
     CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
   } else {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-        compare, invoke_instruction->GetDexPc());
+        graph_->GetArena(),
+        compare,
+        receiver,
+        HDeoptimize::Kind::kInline,
+        invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
     if (return_replacement != nullptr) {
       invoke_instruction->ReplaceWith(return_replacement);
     }
+    receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
     invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+    deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
   }
 
   // Run type propagation to get the guard typed.
@@ -1007,6 +1116,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
 
   MaybeRecordStat(kInlinedPolymorphicCall);
 
+  LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
   return true;
 }
 
@@ -1021,11 +1131,23 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
   if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
     if (invoke_instruction->IsInvokeInterface()) {
+      DCHECK(!method->IsProxyMethod());
       // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
       // better than an invoke-interface because:
       // 1) In the best case, the interface call has one more indirection (to fetch the IMT).
       // 2) We will not go to the conflict trampoline with an invoke-virtual.
       // TODO: Consider sharpening once it is not dependent on the compiler driver.
+
+      if (method->IsDefault() && !method->IsCopied()) {
+        // Changing to invoke-virtual cannot be done on an original default method
+        // since it's not in any vtable. Devirtualization by exact type/inline-cache
+        // always uses a method in the iftable which is never an original default
+        // method.
+        // On the other hand, inlining an original default method by CHA is fine.
+        DCHECK(cha_devirtualize);
+        return false;
+      }
+
       const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
       uint32_t dex_method_index = FindMethodIndexIn(
           method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
@@ -1076,13 +1198,34 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
   return true;
 }
 
+size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
+  const HInliner* current = this;
+  size_t count = 0;
+  do {
+    if (current->graph_->GetArtMethod() == method) {
+      ++count;
+    }
+    current = current->parent_;
+  } while (current != nullptr);
+  return count;
+}
+
 bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
                                  ArtMethod* method,
                                  ReferenceTypeInfo receiver_type,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because of unimplemented inline support for proxy methods.";
+    LOG_FAIL(kNotInlinedProxy)
+        << "Method " << method->PrettyMethod()
+        << " is not inlined because of unimplemented inline support for proxy methods.";
+    return false;
+  }
+
+  if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
+    LOG_FAIL(kNotInlinedRecursiveBudget)
+        << "Method "
+        << method->PrettyMethod()
+        << " is not inlined because it has reached its recursive call budget.";
     return false;
   }
 
@@ -1091,15 +1234,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
   if (!compiler_driver_->MayInline(method->GetDexFile(),
                                    outer_compilation_unit_.GetDexFile())) {
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
-      VLOG(compiler) << "Successfully replaced pattern of invoke "
-                     << method->PrettyMethod();
+      LOG_SUCCESS() << "Successfully replaced pattern of invoke "
+                    << method->PrettyMethod();
       MaybeRecordStat(kReplacedInvokeWithSimplePattern);
       return true;
     }
-    VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
-                   << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
-                   << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
-                   << method->GetDexFile()->GetLocation();
+    LOG_FAIL(kNotInlinedWont)
+        << "Won't inline " << method->PrettyMethod() << " in "
+        << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+        << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+        << method->GetDexFile()->GetLocation();
     return false;
   }
 
@@ -1108,30 +1252,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
   const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because it is native";
+    LOG_FAIL_NO_STAT()
+        << "Method " << method->PrettyMethod() << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is too big to inline: "
-                   << code_item->insns_size_in_code_units_
-                   << " > "
-                   << inline_max_code_units;
+    LOG_FAIL(kNotInlinedCodeItem)
+        << "Method " << method->PrettyMethod()
+        << " is not inlined because its code item is too big: "
+        << code_item->insns_size_in_code_units_
+        << " > "
+        << inline_max_code_units;
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because of try block";
+    LOG_FAIL(kNotInlinedTryCatch)
+        << "Method " << method->PrettyMethod() << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " has soft failures un-handled by the compiler, so it cannot be inlined";
+    LOG_FAIL(kNotInlinedNotVerified)
+        << "Method " << method->PrettyMethod()
+        << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
 
   if (!method->GetDeclaringClass()->IsVerified()) {
@@ -1139,8 +1285,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      VLOG(compiler) << "Method " << method->PrettyMethod()
-                     << " couldn't be verified, so it cannot be inlined";
+      LOG_FAIL(kNotInlinedNotVerified)
+          << "Method " << method->PrettyMethod()
+          << " couldn't be verified, so it cannot be inlined";
       return false;
     }
   }
@@ -1149,9 +1296,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because it is static and requires a clinit"
-                   << " check that cannot be emitted due to Dex cache limitations";
+    LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+             << " is not inlined because it is static and requires a clinit"
+             << " check that cannot be emitted due to Dex cache limitations";
     return false;
   }
 
@@ -1160,7 +1307,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
     return false;
   }
 
-  VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
+  LOG_SUCCESS() << method->PrettyMethod();
   MaybeRecordStat(kInlinedInvoke);
   return true;
 }
@@ -1448,15 +1595,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " could not be built, so cannot be inlined";
+    LOG_FAIL(kNotInlinedCannotBuild)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " could not be built, so cannot be inlined";
     return false;
   }
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " cannot be inlined because of the register allocator";
+    LOG_FAIL(kNotInlinedRegisterAllocator)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " cannot be inlined because of the register allocator";
     return false;
   }
 
@@ -1503,15 +1652,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
                              /* is_first_run */ false).Run();
   }
 
-  size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
-  size_t number_of_inlined_instructions =
-      RunOptimizations(callee_graph, code_item, dex_compilation_unit);
-  number_of_instructions_budget += number_of_inlined_instructions;
+  RunOptimizations(callee_graph, code_item, dex_compilation_unit);
 
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " could not be inlined because it has an infinite loop";
+    LOG_FAIL(kNotInlinedInfiniteLoop)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " could not be inlined because it has an infinite loop";
     return false;
   }
 
@@ -1520,15 +1667,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
     if (predecessor->GetLastInstruction()->IsThrow()) {
       if (invoke_instruction->GetBlock()->IsTryBlock()) {
         // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because one branch always throws and"
-                       << " caller is in a try/catch block";
+        LOG_FAIL(kNotInlinedTryCatch)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because one branch always throws and"
+            << " caller is in a try/catch block";
         return false;
       } else if (graph_->GetExitBlock() == nullptr) {
         // TODO(ngeoffray): Support adding HExit in the caller graph.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because one branch always throws and"
-                       << " caller does not have an exit block";
+        LOG_FAIL(kNotInlinedInfiniteLoop)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because one branch always throws and"
+            << " caller does not have an exit block";
         return false;
       } else if (graph_->HasIrreducibleLoops()) {
         // TODO(ngeoffray): Support re-computing loop information to graphs with
@@ -1544,32 +1693,31 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
   }
 
   if (!has_one_return) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " could not be inlined because it always throws";
+    LOG_FAIL(kNotInlinedAlwaysThrows)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " could not be inlined because it always throws";
     return false;
   }
 
   size_t number_of_instructions = 0;
-
-  bool can_inline_environment =
-      total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
-
   // Skip the entry block, it does not contain instructions that prevent inlining.
   for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
     if (block->IsLoopHeader()) {
       if (block->GetLoopInformation()->IsIrreducible()) {
         // Don't inline methods with irreducible loops, they could prevent some
         // optimizations to run.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because it contains an irreducible loop";
+        LOG_FAIL(kNotInlinedIrreducibleLoop)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because it contains an irreducible loop";
         return false;
       }
       if (!block->GetLoopInformation()->HasExitEdge()) {
         // Don't inline methods with loops without exit, since they cause the
         // loop information to be computed incorrectly when updating after
         // inlining.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because it contains a loop with no exit";
+        LOG_FAIL(kNotInlinedLoopWithoutExit)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because it contains a loop with no exit";
         return false;
       }
     }
@@ -1577,34 +1725,39 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
     for (HInstructionIterator instr_it(block->GetInstructions());
          !instr_it.Done();
          instr_it.Advance()) {
-      if (number_of_instructions++ == number_of_instructions_budget) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " is not inlined because its caller has reached"
-                       << " its instruction budget limit.";
+      if (++number_of_instructions >= inlining_budget_) {
+        LOG_FAIL(kNotInlinedInstructionBudget)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " is not inlined because the outer method has reached"
+            << " its instruction budget limit.";
         return false;
       }
       HInstruction* current = instr_it.Current();
-      if (!can_inline_environment && current->NeedsEnvironment()) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " is not inlined because its caller has reached"
-                       << " its environment budget limit.";
+      if (current->NeedsEnvironment() &&
+          (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
+        LOG_FAIL(kNotInlinedEnvironmentBudget)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " is not inlined because its caller has reached"
+            << " its environment budget limit.";
         return false;
       }
 
       if (current->NeedsEnvironment() &&
           !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
                                             resolved_method)) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because " << current->DebugName()
-                       << " needs an environment, is in a different dex file"
-                       << ", and cannot be encoded in the stack maps.";
+        LOG_FAIL(kNotInlinedStackMaps)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because " << current->DebugName()
+            << " needs an environment, is in a different dex file"
+            << ", and cannot be encoded in the stack maps.";
         return false;
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because " << current->DebugName()
-                       << " it is in a different dex file and requires access to the dex cache";
+        LOG_FAIL(kNotInlinedDexCache)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because " << current->DebugName()
+            << " it is in a different dex file and requires access to the dex cache";
         return false;
       }
 
@@ -1613,21 +1766,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because it is using an unresolved"
-                       << " entrypoint";
+        LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because it is using an unresolved"
+            << " entrypoint";
         return false;
       }
     }
   }
-  number_of_inlined_instructions_ += number_of_instructions;
-
   DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
       << "No instructions can be added to the outer graph while inner graph is being built";
 
+  // Inline the callee graph inside the caller graph.
   const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
   graph_->SetCurrentInstructionId(callee_instruction_counter);
   *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+  // Update our budget for other inlining attempts in `caller_graph`.
+  total_number_of_instructions_ += number_of_instructions;
+  UpdateInliningBudget();
 
   DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
       << "No instructions can be added to the inner graph during inlining into the outer graph";
@@ -1640,9 +1796,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
   return true;
 }
 
-size_t HInliner::RunOptimizations(HGraph* callee_graph,
-                                  const DexFile::CodeItem* code_item,
-                                  const DexCompilationUnit& dex_compilation_unit) {
+void HInliner::RunOptimizations(HGraph* callee_graph,
+                                const DexFile::CodeItem* code_item,
+                                const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
   HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
@@ -1664,23 +1820,37 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
     optimization->Run();
   }
 
-  size_t number_of_inlined_instructions = 0u;
-  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
-    HInliner inliner(callee_graph,
-                     outermost_graph_,
-                     codegen_,
-                     outer_compilation_unit_,
-                     dex_compilation_unit,
-                     compiler_driver_,
-                     handles_,
-                     inline_stats_,
-                     total_number_of_dex_registers_ + code_item->registers_size_,
-                     depth_ + 1);
-    inliner.Run();
-    number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+  // Bail early for pathological cases on the environment (for example recursive calls,
+  // or too large environment).
+  if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) {
+    LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+             << " will not be inlined because the outer method has reached"
+             << " its environment budget limit.";
+    return;
+  }
+
+  // Bail early if we know we already are over the limit.
+  size_t number_of_instructions = CountNumberOfInstructions(callee_graph);
+  if (number_of_instructions > inlining_budget_) {
+    LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+             << " will not be inlined because the outer method has reached"
+             << " its instruction budget limit. " << number_of_instructions;
+    return;
   }
 
-  return number_of_inlined_instructions;
+  HInliner inliner(callee_graph,
+                   outermost_graph_,
+                   codegen_,
+                   outer_compilation_unit_,
+                   dex_compilation_unit,
+                   compiler_driver_,
+                   handles_,
+                   inline_stats_,
+                   total_number_of_dex_registers_ + code_item->registers_size_,
+                   total_number_of_instructions_ + number_of_instructions,
+                   this,
+                   depth_ + 1);
+  inliner.Run();
 }
 
 static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a032042c78..9e4685cbf4 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -42,7 +42,9 @@ class HInliner : public HOptimization {
            VariableSizedHandleScope* handles,
            OptimizingCompilerStats* stats,
            size_t total_number_of_dex_registers,
-           size_t depth)
+           size_t total_number_of_instructions,
+           HInliner* parent,
+           size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
         outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
@@ -50,8 +52,10 @@ class HInliner : public HOptimization {
         codegen_(codegen),
         compiler_driver_(compiler_driver),
         total_number_of_dex_registers_(total_number_of_dex_registers),
+        total_number_of_instructions_(total_number_of_instructions),
+        parent_(parent),
         depth_(depth),
-        number_of_inlined_instructions_(0),
+        inlining_budget_(0),
         handles_(handles),
         inline_stats_(nullptr) {}
 
@@ -95,10 +99,10 @@ class HInliner : public HOptimization {
                                HInstruction** return_replacement);
 
   // Run simple optimizations on `callee_graph`.
-  // Returns the number of inlined instructions.
-  size_t RunOptimizations(HGraph* callee_graph,
-                          const DexFile::CodeItem* code_item,
-                          const DexCompilationUnit& dex_compilation_unit);
+  void RunOptimizations(HGraph* callee_graph,
+                        const DexFile::CodeItem* code_item,
+                        const DexCompilationUnit& dex_compilation_unit)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
   bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -259,14 +263,30 @@ class HInliner : public HOptimization {
                                                 HInstruction* return_replacement,
                                                 HInstruction* invoke_instruction);
 
+  // Update the inlining budget based on `total_number_of_instructions_`.
+  void UpdateInliningBudget();
+
+  // Count the number of calls of `method` being inlined recursively.
+  size_t CountRecursiveCallsOf(ArtMethod* method) const;
+
+  // Pretty-print for spaces during logging.
+  std::string DepthString(int line) const;
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
   CompilerDriver* const compiler_driver_;
   const size_t total_number_of_dex_registers_;
+  size_t total_number_of_instructions_;
+
+  // The 'parent' inliner, that means the inlinigng optimization that requested
+  // `graph_` to be inlined.
+  const HInliner* const parent_;
   const size_t depth_;
-  size_t number_of_inlined_instructions_;
+
+  // The budget left for inlining, in number of instructions.
+  size_t inlining_budget_;
   VariableSizedHandleScope* const handles_;
 
   // Used to record stats about optimizations on the inlined graph.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17421fc364..60790e5b84 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -2132,6 +2132,9 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) {
   if (cond->IsConstant()) {
     if (cond->AsIntConstant()->IsFalse()) {
       // Never deopt: instruction can be removed.
+      if (deoptimize->GuardsAnInput()) {
+        deoptimize->ReplaceWith(deoptimize->GuardedInput());
+      }
       deoptimize->GetBlock()->RemoveInstruction(deoptimize);
     } else {
       // Always deopt.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index b25bad7170..0d933eaf82 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -39,6 +39,7 @@ using helpers::Int32ConstantFrom;
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
+using helpers::HighSRegisterFrom;
 using helpers::OutputDRegister;
 using helpers::OutputSRegister;
 using helpers::OutputRegister;
@@ -794,6 +795,58 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
   __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+  if (features_.HasARMv8AInstructions()) {
+    LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                              LocationSummary::kNoCall,
+                                                              kIntrinsified);
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
+  vixl32::Register out_reg = OutputRegister(invoke);
+  vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  vixl32::Label done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
+
+  // Round to nearest integer, ties away from zero.
+  __ Vcvta(S32, F32, temp1, in_reg);
+  __ Vmov(out_reg, temp1);
+
+  // For positive, zero or NaN inputs, rounding is done.
+  __ Cmp(out_reg, 0);
+  __ B(ge, final_label, /* far_target */ false);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
+  __ Vrinta(F32, F32, temp1, in_reg);
+  __ Vmov(temp2, 0.5);
+  __ Vsub(F32, temp1, in_reg, temp1);
+  __ Vcmp(F32, temp1, temp2);
+  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+  {
+    // Use ExactAsemblyScope here because we are using IT.
+    ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+                                2 * kMaxInstructionSizeInBytes,
+                                CodeBufferCheckScope::kMaximumSize);
+    __ it(eq);
+    __ add(eq, out_reg, out_reg, 1);
+  }
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+}
+
 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -3100,7 +3153,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
 }
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 5bcfa4c98b..8d15f78cce 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -28,7 +28,18 @@ namespace art {
  */
 class LICMTest : public CommonCompilerTest {
  public:
-  LICMTest() : pool_(), allocator_(&pool_) {
+  LICMTest()
+      : pool_(),
+        allocator_(&pool_),
+        entry_(nullptr),
+        loop_preheader_(nullptr),
+        loop_header_(nullptr),
+        loop_body_(nullptr),
+        return_(nullptr),
+        exit_(nullptr),
+        parameter_(nullptr),
+        int_constant_(nullptr),
+        float_constant_(nullptr) {
     graph_ = CreateGraph(&allocator_);
   }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ec706e6694..caada8bccb 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1088,6 +1088,19 @@ void HInstruction::ReplaceWith(HInstruction* other) {
   DCHECK(env_uses_.empty());
 }
 
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+  const HUseList<HInstruction*>& uses = GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
+    if (dominator->StrictlyDominates(user)) {
+      user->ReplaceInput(replacement, index);
+    }
+  }
+}
+
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
   HUserRecord<HInstruction*> input_use = InputRecordAt(index);
   if (input_use.GetInstruction() == replacement) {
@@ -1323,6 +1336,18 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
+  switch (rhs) {
+    case HDeoptimize::Kind::kBCE:
+      return os << "bce";
+    case HDeoptimize::Kind::kInline:
+      return os << "inline";
+    default:
+      LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6881d8f6ae..5f5a28c520 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -341,6 +341,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
+        art_method_(nullptr),
         inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
         osr_(osr),
         cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
@@ -2080,6 +2081,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
   void SetLocations(LocationSummary* locations) { locations_ = locations; }
 
   void ReplaceWith(HInstruction* instruction);
+  void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
   void ReplaceInput(HInstruction* replacement, size_t index);
 
   // This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2943,28 +2945,97 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
 };
 
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HVariableInputSizeInstruction {
  public:
+  enum class Kind {
+    kBCE,
+    kInline,
+    kLast = kInline
+  };
+
+  // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
+  // across.
+  HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+      : HVariableInputSizeInstruction(
+            SideEffects::All(),
+            dex_pc,
+            arena,
+            /* number_of_inputs */ 1,
+            kArenaAllocMisc) {
+    SetPackedFlag<kFieldCanBeMoved>(false);
+    SetPackedField<DeoptimizeKindField>(kind);
+    SetRawInputAt(0, cond);
+  }
+
+  // Use this constructor when the `HDeoptimize` guards an instruction, and any user
+  // that relies on the deoptimization to pass should have its input be the `HDeoptimize`
+  // instead of `guard`.
   // We set CanTriggerGC to prevent any intermediate address to be live
   // at the point of the `HDeoptimize`.
-  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+  HDeoptimize(ArenaAllocator* arena,
+              HInstruction* cond,
+              HInstruction* guard,
+              Kind kind,
+              uint32_t dex_pc)
+      : HVariableInputSizeInstruction(
+            SideEffects::CanTriggerGC(),
+            dex_pc,
+            arena,
+            /* number_of_inputs */ 2,
+            kArenaAllocMisc) {
+    SetPackedFlag<kFieldCanBeMoved>(true);
+    SetPackedField<DeoptimizeKindField>(kind);
     SetRawInputAt(0, cond);
+    SetRawInputAt(1, guard);
   }
 
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
+  bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
   }
+
   bool NeedsEnvironment() const OVERRIDE { return true; }
+
   bool CanThrow() const OVERRIDE { return true; }
 
+  Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
+  }
+
+  bool GuardsAnInput() const {
+    return InputCount() == 2;
+  }
+
+  HInstruction* GuardedInput() const {
+    DCHECK(GuardsAnInput());
+    return InputAt(1);
+  }
+
+  void RemoveGuard() {
+    RemoveInputAt(1);
+  }
+
   DECLARE_INSTRUCTION(Deoptimize);
 
  private:
+  static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
+  static constexpr size_t kFieldDeoptimizeKindSize =
+      MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+  static constexpr size_t kNumberOfDeoptimizePackedBits =
+      kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
+  static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
+
 // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
 // The compiled code checks this flag value in a guard before devirtualized call and
 // if it's true, starts to do deoptimization.
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index d84fe6ccff..60af2b4201 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -174,53 +174,45 @@ static constexpr uint8_t expected_cfi_kMips[] = {
 // 0x00000034: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
-    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
-    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
-    0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67,
-    0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
-    0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
-    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+    0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+    0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+    0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+    0x00, 0x00, 0x1F, 0xD8,
 };
-
 static constexpr uint8_t expected_cfi_kMips64[] = {
-    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44,
-    0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
-    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44,
+    0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
 // 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
 // 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: daddiu r29, r29, 24
-// 0x00000020: .cfi_def_cfa_offset: 40
-// 0x00000020: ldc1 f24, +0(r29)
-// 0x00000024: .cfi_restore: r56
-// 0x00000024: ldc1 f25, +8(r29)
+// 0x00000018: .cfi_remember_state
+// 0x00000018: ld r31, +56(r29)
+// 0x0000001c: .cfi_restore: r31
+// 0x0000001c: ld r17, +48(r29)
+// 0x00000020: .cfi_restore: r17
+// 0x00000020: ld r16, +40(r29)
+// 0x00000024: .cfi_restore: r16
+// 0x00000024: ldc1 f25, +32(r29)
 // 0x00000028: .cfi_restore: r57
-// 0x00000028: ld r16, +16(r29)
-// 0x0000002c: .cfi_restore: r16
-// 0x0000002c: ld r17, +24(r29)
-// 0x00000030: .cfi_restore: r17
-// 0x00000030: ld r31, +32(r29)
-// 0x00000034: .cfi_restore: r31
-// 0x00000034: daddiu r29, r29, 40
-// 0x00000038: .cfi_def_cfa_offset: 0
-// 0x00000038: jr r31
-// 0x0000003c: nop
-// 0x00000040: .cfi_restore_state
-// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000028: ldc1 f24, +24(r29)
+// 0x0000002c: .cfi_restore: r56
+// 0x0000002c: daddiu r29, r29, 64
+// 0x00000030: .cfi_def_cfa_offset: 0
+// 0x00000030: jic r31, 0
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
 #ifdef ART_USE_OLD_ARM_BACKEND
@@ -403,58 +395,52 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = {
 // 0x00020060: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
-    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
-    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
-    0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60,
-    0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+    0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+    0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+    0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
 };
 static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
-    0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
-    0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
-    0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+    0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+    0x00, 0x00, 0x1F, 0xD8,
 };
 static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
-    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00,
-    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
-    0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A,
+    0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E,
+    0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
 // 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
 // 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: bnec r5, r6, 0x0000002c ; +12
-// 0x00000020: auipc r1, 2
-// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080
-// 0x00000028: nop
+// 0x00000018: bnec r5, r6, 0x00000024 ; +12
+// 0x0000001c: auipc r1, 2
+// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080
+// 0x00000024: nop
 //             ...
-// 0x00020028: nop
-// 0x0002002c: .cfi_remember_state
-// 0x0002002c: daddiu r29, r29, 24
-// 0x00020030: .cfi_def_cfa_offset: 40
-// 0x00020030: ldc1 f24, +0(r29)
-// 0x00020034: .cfi_restore: r56
-// 0x00020034: ldc1 f25, +8(r29)
+// 0x00020024: nop
+// 0x00020028: .cfi_remember_state
+// 0x00020028: ld r31, +56(r29)
+// 0x0002002c: .cfi_restore: r31
+// 0x0002002c: ld r17, +48(r29)
+// 0x00020030: .cfi_restore: r17
+// 0x00020030: ld r16, +40(r29)
+// 0x00020034: .cfi_restore: r16
+// 0x00020034: ldc1 f25, +32(r29)
 // 0x00020038: .cfi_restore: r57
-// 0x00020038: ld r16, +16(r29)
-// 0x0002003c: .cfi_restore: r16
-// 0x0002003c: ld r17, +24(r29)
-// 0x00020040: .cfi_restore: r17
-// 0x00020040: ld r31, +32(r29)
-// 0x00020044: .cfi_restore: r31
-// 0x00020044: daddiu r29, r29, 40
-// 0x00020047: .cfi_def_cfa_offset: 0
-// 0x00020048: jr r31
-// 0x0002004c: nop
-// 0x00020050: .cfi_restore_state
-// 0x00020050: .cfi_def_cfa_offset: 64
+// 0x00020038: ldc1 f24, +24(r29)
+// 0x0002003c: .cfi_restore: r56
+// 0x0002003c: daddiu r29, r29, 64
+// 0x00020040: .cfi_def_cfa_offset: 0
+// 0x00020040: jic r31, 0
+// 0x00020044: .cfi_restore_state
+// 0x00020044: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 23ccd9e953..3c6d2d64a9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -499,7 +499,8 @@ static HOptimization* BuildOptimization(
                                 handles,
                                 stats,
                                 number_of_dex_registers,
-                                /* depth */ 0);
+                                /* total_number_of_instructions */ 0,
+                                /* parent */ nullptr);
   } else if (opt_name == HSharpening::kSharpeningPassName) {
     return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
   } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
@@ -607,8 +608,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
                                          VariableSizedHandleScope* handles) const {
   OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
-  bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
-      && (compiler_options.GetInlineMaxCodeUnits() > 0);
+  bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0);
   if (!should_inline) {
     return;
   }
@@ -623,7 +623,8 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
       handles,
       stats,
       number_of_dex_registers,
-      /* depth */ 0);
+      /* total_number_of_instructions */ 0,
+      /* parent */ nullptr);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index ae9a8119a7..a211c5472a 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -69,6 +69,23 @@ enum MethodCompilationStat {
   kExplicitNullCheckGenerated,
   kSimplifyIf,
   kInstructionSunk,
+  kNotInlinedUnresolvedEntrypoint,
+  kNotInlinedDexCache,
+  kNotInlinedStackMaps,
+  kNotInlinedEnvironmentBudget,
+  kNotInlinedInstructionBudget,
+  kNotInlinedLoopWithoutExit,
+  kNotInlinedIrreducibleLoop,
+  kNotInlinedAlwaysThrows,
+  kNotInlinedInfiniteLoop,
+  kNotInlinedTryCatch,
+  kNotInlinedRegisterAllocator,
+  kNotInlinedCannotBuild,
+  kNotInlinedNotVerified,
+  kNotInlinedCodeItem,
+  kNotInlinedWont,
+  kNotInlinedRecursiveBudget,
+  kNotInlinedProxy,
   kLastStat
 };
 
@@ -168,6 +185,23 @@ class OptimizingCompilerStats {
       case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
       case kSimplifyIf: name = "SimplifyIf"; break;
       case kInstructionSunk: name = "InstructionSunk"; break;
+      case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break;
+      case kNotInlinedDexCache: name = "NotInlinedDexCache"; break;
+      case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break;
+      case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break;
+      case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break;
+      case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break;
+      case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break;
+      case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break;
+      case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break;
+      case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break;
+      case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break;
+      case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break;
+      case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break;
+      case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break;
+      case kNotInlinedWont: name = "NotInlinedWont"; break;
+      case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
+      case kNotInlinedProxy: name = "NotInlinedProxy"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index efbaf6c221..66bfea9860 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,14 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) {
   check->ReplaceWith(check->InputAt(0));
 }
 
+void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) {
+  if (deoptimize->GuardsAnInput()) {
+    // Replace the uses with the actual guarded instruction.
+    deoptimize->ReplaceWith(deoptimize->GuardedInput());
+    deoptimize->RemoveGuard();
+  }
+}
+
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
   if (check->IsStringCharAt()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c128227654..7ffbe44ef6 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -44,6 +44,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
   bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
   bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 6e332ca59b..d5637b9b75 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -310,8 +310,8 @@ static void BoundTypeForClassCheck(HInstruction* check) {
     BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
   } else {
     DCHECK(check->IsDeoptimize());
-    if (compare->IsEqual()) {
-      BoundTypeIn(receiver, check->GetBlock(), check, class_rti);
+    if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
+      check->SetReferenceTypeInfo(class_rti);
     }
   }
 }
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 84a4bab1a9..0b49ce1a4c 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -29,7 +29,7 @@ namespace art {
  */
 class ReferenceTypePropagationTest : public CommonCompilerTest {
  public:
-  ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+  ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) {
     graph_ = CreateGraph(&allocator_);
   }
 
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index ab0dad4300..9236a0e4fa 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -315,7 +315,10 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
   // This class and its sub-classes will never be used to drive a visit of an
   // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
   // to pass a valid graph to `HGraphDelegateVisitor()`.
-  SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+  SchedulingLatencyVisitor()
+      : HGraphDelegateVisitor(nullptr),
+        last_visited_latency_(0),
+        last_visited_internal_latency_(0) {}
 
   void VisitInstruction(HInstruction* instruction) OVERRIDE {
     LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
@@ -413,6 +416,7 @@ class HScheduler {
         selector_(selector),
         only_optimize_loop_blocks_(true),
         scheduling_graph_(this, arena),
+        cursor_(nullptr),
         candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
   virtual ~HScheduler() {}
 
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 1916c73ca4..a1016d1d47 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -189,13 +189,14 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
   // Use HAboveOrEqual+HDeoptimize as the bounds check.
   HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
   block->AddInstruction(ae);
-  HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u);
+  HInstruction* deoptimize =
+      new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
   block->AddInstruction(deoptimize);
   HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
-                                                                  /* number_of_vregs */ 5,
-                                                                  /* method */ nullptr,
-                                                                  /* dex_pc */ 0u,
-                                                                  deoptimize);
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                deoptimize);
   deoptimize_env->CopyFrom(args);
   deoptimize->SetRawEnvironment(deoptimize_env);
   HInstruction* array_set =
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
index d71c2fe997..ad3a099eb6 100644
--- a/compiler/utils/atomic_method_ref_map-inl.h
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -42,7 +42,7 @@ template <typename T>
 inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
   const ElementArray* const array = GetArray(ref.dex_file);
   if (array == nullptr) {
-    return kInsertResultInvalidDexFile;
+    return false;
   }
   *out = (*array)[ref.dex_method_index].LoadRelaxed();
   return true;
diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc
index dea396e4a7..42d061ec15 100644
--- a/compiler/utils/mips64/managed_register_mips64.cc
+++ b/compiler/utils/mips64/managed_register_mips64.cc
@@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const {
   CHECK(IsValidManagedRegister());
   CHECK(other.IsValidManagedRegister());
   if (Equals(other)) return true;
+  if (IsFpuRegister() && other.IsVectorRegister()) {
+    return (AsFpuRegister() == other.AsOverlappingFpuRegister());
+  } else if (IsVectorRegister() && other.IsFpuRegister()) {
+    return (AsVectorRegister() == other.AsOverlappingVectorRegister());
+  }
   return false;
 }
 
@@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const {
     os << "GPU: " << static_cast<int>(AsGpuRegister());
   } else if (IsFpuRegister()) {
      os << "FpuRegister: " << static_cast<int>(AsFpuRegister());
+  } else if (IsVectorRegister()) {
+     os << "VectorRegister: " << static_cast<int>(AsVectorRegister());
   } else {
     os << "??: " << RegId();
   }
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index c9f95569cf..3980199b1e 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -30,11 +30,27 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters;
 const int kNumberOfFpuRegIds = kNumberOfFpuRegisters;
 const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters;
 
-const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds;
-const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds;
-
-// An instance of class 'ManagedRegister' represents a single GPU register (enum
-// Register) or a double precision FP register (enum FpuRegister)
+const int kNumberOfVecRegIds = kNumberOfVectorRegisters;
+const int kNumberOfVecAllocIds = kNumberOfVectorRegisters;
+
+const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds;
+const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds;
+
+// Register ids map:
+//   [0..R[  core registers (enum GpuRegister)
+//   [R..F[  floating-point registers (enum FpuRegister)
+//   [F..W[  MSA vector registers (enum VectorRegister)
+// where
+//   R = kNumberOfGpuRegIds
+//   F = R + kNumberOfFpuRegIds
+//   W = F + kNumberOfVecRegIds
+
+// An instance of class 'ManagedRegister' represents a single Mips64 register.
+// A register can be one of the following:
+//  * core register (enum GpuRegister)
+//  * floating-point register (enum FpuRegister)
+//  * MSA vector register (enum VectorRegister)
+//
 // 'ManagedRegister::NoRegister()' provides an invalid register.
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
@@ -49,6 +65,21 @@ class Mips64ManagedRegister : public ManagedRegister {
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
+  constexpr VectorRegister AsVectorRegister() const {
+    CHECK(IsVectorRegister());
+    return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters));
+  }
+
+  constexpr FpuRegister AsOverlappingFpuRegister() const {
+    CHECK(IsValidManagedRegister());
+    return static_cast<FpuRegister>(AsVectorRegister());
+  }
+
+  constexpr VectorRegister AsOverlappingVectorRegister() const {
+    CHECK(IsValidManagedRegister());
+    return static_cast<VectorRegister>(AsFpuRegister());
+  }
+
   constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
@@ -60,6 +91,12 @@ class Mips64ManagedRegister : public ManagedRegister {
     return (0 <= test) && (test < kNumberOfFpuRegIds);
   }
 
+  constexpr bool IsVectorRegister() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+    return (0 <= test) && (test < kNumberOfVecRegIds);
+  }
+
   void Print(std::ostream& os) const;
 
   // Returns true if the two managed-registers ('this' and 'other') overlap.
@@ -77,6 +114,11 @@ class Mips64ManagedRegister : public ManagedRegister {
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
+  static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) {
+    CHECK_NE(r, kNoVectorRegister);
+    return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+  }
+
  private:
   constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc
new file mode 100644
index 0000000000..8b72d7e61d
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64_test.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_mips64.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace mips64 {
+
+TEST(Mips64ManagedRegister, NoRegister) {
+  Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64();
+  EXPECT_TRUE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.Overlaps(reg));
+}
+
+TEST(Mips64ManagedRegister, GpuRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(ZERO, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(AT);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(AT, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(V0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(V0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(A0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A7);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(A7, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(T0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(T0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(T3);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(T3, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(S0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(S0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(GP);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(GP, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(SP);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(SP, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(RA);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(RA, reg.AsGpuRegister());
+}
+
+TEST(Mips64ManagedRegister, FpuRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+  Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F0, reg.AsFpuRegister());
+  EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F1);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W1);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F1, reg.AsFpuRegister());
+  EXPECT_EQ(W1, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F20);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W20);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F20, reg.AsFpuRegister());
+  EXPECT_EQ(W20, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F31);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F31, reg.AsFpuRegister());
+  EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+}
+
+TEST(Mips64ManagedRegister, VectorRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0);
+  Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W0, reg.AsVectorRegister());
+  EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W2);
+  freg = Mips64ManagedRegister::FromFpuRegister(F2);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W2, reg.AsVectorRegister());
+  EXPECT_EQ(F2, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W13);
+  freg = Mips64ManagedRegister::FromFpuRegister(F13);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W13, reg.AsVectorRegister());
+  EXPECT_EQ(F13, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W29);
+  freg = Mips64ManagedRegister::FromFpuRegister(F29);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W29, reg.AsVectorRegister());
+  EXPECT_EQ(F29, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29)));
+}
+
+TEST(Mips64ManagedRegister, Equals) {
+  ManagedRegister no_reg = ManagedRegister::NoRegister();
+  EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1);
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2);
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31);
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+  EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+  EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+TEST(Mips64ManagedRegister, Overlaps) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+  Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F4);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W4);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W4, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F16);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W16);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W16, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F31);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W0);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W4);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F4);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F4, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W16);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F16);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F16, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W31);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F31);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F31, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A0);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(S0);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(RA);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 4bfc84990d..fa7e98586c 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -18,21 +18,21 @@
 #include "verifier/verifier_deps.h"
 
 #include "class_linker.h"
-#include "compiler/common_compiler_test.h"
-#include "compiler/dex/verification_results.h"
-#include "compiler/dex/verified_method.h"
-#include "compiler/driver/compiler_options.h"
-#include "compiler/driver/compiler_driver.h"
-#include "compiler/utils/atomic_method_ref_map-inl.h"
+#include "common_compiler_test.h"
 #include "compiler_callbacks.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
+#include "driver/compiler_options.h"
+#include "driver/compiler_driver.h"
 #include "handle_scope-inl.h"
 #include "verifier/method_verifier-inl.h"
 #include "mirror/class_loader.h"
 #include "runtime.h"
 #include "thread.h"
 #include "scoped_thread_state_change-inl.h"
+#include "utils/atomic_method_ref_map-inl.h"
 
 namespace art {
 namespace verifier {