155 files changed, 7630 insertions, 728 deletions
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index 2d6b6a3cbf..183f4e3ec3 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -41,7 +41,7 @@ else
 ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
 endif
 
-# Core.oat location on the device.
+# core.oat location on the device.
 TARGET_CORE_OAT := $(ART_TARGET_TEST_DIR)/$(DEX2OAT_TARGET_ARCH)/core.oat
 ifdef TARGET_2ND_ARCH
 2ND_TARGET_CORE_OAT := $(ART_TARGET_TEST_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
@@ -49,7 +49,7 @@ endif
 
 CORE_OAT_SUFFIX := .oat
 
-# Core.oat locations under the out directory.
+# core.oat locations under the out directory.
 HOST_CORE_OAT_OUT_BASE := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core
 ifneq ($(HOST_PREFER_32_BIT),true)
 2ND_HOST_CORE_OAT_OUT_BASE := $(HOST_OUT_JAVA_LIBRARIES)/$(2ND_ART_HOST_ARCH)/core
@@ -63,7 +63,7 @@ TARGET_CORE_OAT_OUTS :=
 
 CORE_IMG_SUFFIX := .art
 
-# Core.art locations under the out directory.
+# core.art locations under the out directory.
 HOST_CORE_IMG_OUT_BASE := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core
 ifneq ($(HOST_PREFER_32_BIT),true)
 2ND_HOST_CORE_IMG_OUT_BASE := $(HOST_OUT_JAVA_LIBRARIES)/$(2ND_ART_HOST_ARCH)/core
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 6192be75a4..98fd327f02 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -101,6 +101,19 @@ namespace art {
     return ::testing::AssertionFailure() << "key was not in the map";
   }
 
+  template <typename TMap, typename TKey, typename T>
+  ::testing::AssertionResult IsExpectedDefaultKeyValue(const T& expected,
+                                                       const TMap& map,
+                                                       const TKey& key) {
+    const T& actual = map.GetOrDefault(key);
+    if (!UsuallyEquals(expected, actual)) {
+      return ::testing::AssertionFailure()
+          << "expected " << detail::ToStringAny(expected) << " but got "
+          << detail::ToStringAny(actual);
+     }
+    return ::testing::AssertionSuccess();
+  }
+
 class CmdlineParserTest : public ::testing::Test {
  public:
   CmdlineParserTest() = default;
@@ -145,13 +158,23 @@ class CmdlineParserTest : public ::testing::Test {
 
 #define EXPECT_KEY_EXISTS(map, key) EXPECT_TRUE((map).Exists(key))
 #define EXPECT_KEY_VALUE(map, key, expected) EXPECT_TRUE(IsExpectedKeyValue(expected, map, key))
+#define EXPECT_DEFAULT_KEY_VALUE(map, key, expected) EXPECT_TRUE(IsExpectedDefaultKeyValue(expected, map, key))
 
-#define EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)               \
+#define _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)              \
   do {                                                        \
     EXPECT_TRUE(IsResultSuccessful(parser_->Parse(argv)));    \
     EXPECT_EQ(0u, parser_->GetArgumentsMap().Size());         \
+
+#define EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)               \
+  _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv);                   \
   } while (false)
 
+#define EXPECT_SINGLE_PARSE_DEFAULT_VALUE(expected, argv, key)\
+  _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv);                   \
+    RuntimeArgumentMap args = parser_->ReleaseArgumentsMap(); \
+    EXPECT_DEFAULT_KEY_VALUE(args, key, expected);            \
+  } while (false)                                             // NOLINT [readability/namespace] [5]
+
 #define _EXPECT_SINGLE_PARSE_EXISTS(argv, key)                \
   do {                                                        \
     EXPECT_TRUE(IsResultSuccessful(parser_->Parse(argv)));    \
@@ -509,6 +532,24 @@ TEST_F(CmdlineParserTest, TestProfilerOptions) {
   }
 }  // TEST_F
 
+/* -X[no]experimental-lambdas */
+TEST_F(CmdlineParserTest, TestExperimentalLambdas) {
+  // Off by default
+  EXPECT_SINGLE_PARSE_DEFAULT_VALUE(false,
+                                    "",
+                                    M::ExperimentalLambdas);
+
+  // Disabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(false,
+                            "-Xnoexperimental-lambdas",
+                            M::ExperimentalLambdas);
+
+  // Enabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(true,
+                            "-Xexperimental-lambdas",
+                            M::ExperimentalLambdas);
+}
+
 TEST_F(CmdlineParserTest, TestIgnoreUnrecognized) {
   RuntimeParser::Builder parserBuilder;
 
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 28bd754108..a57b6196de 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -472,6 +472,7 @@ struct XGcOption {
   bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
   bool verify_pre_sweeping_rosalloc_ = false;
   bool verify_post_gc_rosalloc_ = false;
+  bool gcstress_ = false;
 };
 
 template <>
@@ -509,6 +510,10 @@ struct CmdlineType<XGcOption> : CmdlineTypeParser<XGcOption> {
         xgc.verify_post_gc_rosalloc_ = true;
       } else if (gc_option == "nopostverify_rosalloc") {
         xgc.verify_post_gc_rosalloc_ = false;
+      } else if (gc_option == "gcstress") {
+        xgc.gcstress_ = true;
+      } else if (gc_option == "nogcstress") {
+        xgc.gcstress_ = false;
       } else if ((gc_option == "precise") ||
                  (gc_option == "noprecise") ||
                  (gc_option == "verifycardtable") ||
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 67536f00b9..dd214067a6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -103,6 +103,7 @@ LIBART_COMPILER_SRC_FILES := \
 	optimizing/code_generator.cc \
 	optimizing/code_generator_arm.cc \
 	optimizing/code_generator_arm64.cc \
+	optimizing/code_generator_mips64.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/code_generator_x86_64.cc \
 	optimizing/code_generator_utils.cc \
@@ -243,7 +244,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT
     endif
   endif
 
-  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime art/disassembler
 
   ifeq ($$(art_target_or_host),host)
     # For compiler driver TLS.
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
index bbc49df6d2..15fc0335a9 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/buffered_output_stream.h
@@ -28,6 +28,7 @@ class BufferedOutputStream FINAL : public OutputStream {
   explicit BufferedOutputStream(OutputStream* out);
 
   virtual ~BufferedOutputStream() {
+    Flush();
     delete out_;
   }
 
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index e2b99871c8..94ba4fad2a 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -160,20 +160,10 @@ uint16_t GlobalValueNumbering::GetArrayLocation(uint16_t base, uint16_t index) {
   return location;
 }
 
-bool GlobalValueNumbering::HasNullCheckLastInsn(const BasicBlock* pred_bb,
-                                                BasicBlockId succ_id) {
-  if (pred_bb->block_type != kDalvikByteCode || pred_bb->last_mir_insn == nullptr) {
-    return false;
-  }
-  Instruction::Code last_opcode = pred_bb->last_mir_insn->dalvikInsn.opcode;
-  return ((last_opcode == Instruction::IF_EQZ && pred_bb->fall_through == succ_id) ||
-      (last_opcode == Instruction::IF_NEZ && pred_bb->taken == succ_id));
-}
-
 bool GlobalValueNumbering::NullCheckedInAllPredecessors(
     const ScopedArenaVector<uint16_t>& merge_names) const {
   // Implicit parameters:
-  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - *work_lvn_: the LVN for which we're checking predecessors.
   //   - merge_lvns_: the predecessor LVNs.
   DCHECK_EQ(merge_lvns_.size(), merge_names.size());
   for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
@@ -198,7 +188,7 @@ bool GlobalValueNumbering::NullCheckedInAllPredecessors(
 bool GlobalValueNumbering::DivZeroCheckedInAllPredecessors(
     const ScopedArenaVector<uint16_t>& merge_names) const {
   // Implicit parameters:
-  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - *work_lvn_: the LVN for which we're checking predecessors.
   //   - merge_lvns_: the predecessor LVNs.
   DCHECK_EQ(merge_lvns_.size(), merge_names.size());
   for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
@@ -217,15 +207,11 @@ bool GlobalValueNumbering::IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_i
   if (bb->predecessors.size() == 1u) {
     BasicBlockId pred_id = bb->predecessors[0];
     BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
-    if (pred_bb->last_mir_insn != nullptr) {
-      Instruction::Code opcode = pred_bb->last_mir_insn->dalvikInsn.opcode;
-      if ((opcode == Instruction::IF_NEZ && pred_bb->taken == bb_id) ||
-          (opcode == Instruction::IF_EQZ && pred_bb->fall_through == bb_id)) {
-        DCHECK(lvns_[pred_id] != nullptr);
-        uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]);
-        if (operand == cond) {
-          return true;
-        }
+    if (pred_bb->BranchesToSuccessorOnlyIfNotZero(bb_id)) {
+      DCHECK(lvns_[pred_id] != nullptr);
+      uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]);
+      if (operand == cond) {
+        return true;
       }
     }
   }
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index bd2f187d17..c514f75dcc 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -194,7 +194,9 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
     return mir_graph_->GetBasicBlock(bb_id);
   }
 
-  static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id);
+  static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id) {
+    return pred_bb->BranchesToSuccessorOnlyIfNotZero(succ_id);
+  }
 
   bool NullCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9fa5148ced..38342420ac 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -173,7 +173,7 @@ int MIRGraph::ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decod
   decoded_instruction->vB = inst->HasVRegB() ? inst->VRegB() : 0;
   decoded_instruction->vB_wide = inst->HasWideVRegB() ? inst->WideVRegB() : 0;
   decoded_instruction->vC = inst->HasVRegC() ?  inst->VRegC() : 0;
-  if (inst->HasVarArgs()) {
+  if (inst->HasVarArgs35c()) {
     inst->GetVarArgs(decoded_instruction->arg);
   }
   return inst->SizeInCodeUnits();
@@ -398,12 +398,13 @@ bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset,
   DCHECK(monitor_exit->Opcode() == Instruction::MONITOR_EXIT);
   int monitor_reg = monitor_exit->VRegA_11x();
   const Instruction* check_insn = Instruction::At(current_code_item_->insns_ + catch_offset);
-  DCHECK(check_insn->Opcode() == Instruction::MOVE_EXCEPTION);
-  if (check_insn->VRegA_11x() == monitor_reg) {
-    // Unexpected move-exception to the same register. Probably not the pattern we're looking for.
-    return false;
+  if (check_insn->Opcode() == Instruction::MOVE_EXCEPTION) {
+    if (check_insn->VRegA_11x() == monitor_reg) {
+      // Unexpected move-exception to the same register. Probably not the pattern we're looking for.
+      return false;
+    }
+    check_insn = check_insn->Next();
   }
-  check_insn = check_insn->Next();
   while (true) {
     int dest = -1;
     bool wide = false;
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index f038397e1e..dbe906280f 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -452,6 +452,21 @@ class BasicBlock : public DeletableArenaObject<kArenaAllocBB> {
   MIR* GetFirstNonPhiInsn();
 
   /**
+   * @brief Checks whether the block ends with if-nez or if-eqz that branches to
+   *        the given successor only if the register in not zero.
+   */
+  bool BranchesToSuccessorOnlyIfNotZero(BasicBlockId succ_id) const {
+    if (last_mir_insn == nullptr) {
+      return false;
+    }
+    Instruction::Code last_opcode = last_mir_insn->dalvikInsn.opcode;
+    return ((last_opcode == Instruction::IF_EQZ && fall_through == succ_id) ||
+        (last_opcode == Instruction::IF_NEZ && taken == succ_id)) &&
+        // Make sure the other successor isn't the same (empty if), b/21614284.
+        (fall_through != taken);
+  }
+
+  /**
    * @brief Used to obtain the next MIR that follows unconditionally.
    * @details The implementation does not guarantee that a MIR does not
    * follow even if this method returns nullptr.
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 645511ed9f..5bb0ce3ba5 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -978,18 +978,12 @@ bool MIRGraph::EliminateNullChecks(BasicBlock* bb) {
       BasicBlock* pred_bb = GetBasicBlock(pred_id);
       DCHECK(pred_bb != nullptr);
       MIR* null_check_insn = nullptr;
-      if (pred_bb->block_type == kDalvikByteCode) {
-        // Check to see if predecessor had an explicit null-check.
-        MIR* last_insn = pred_bb->last_mir_insn;
-        if (last_insn != nullptr) {
-          Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
-          if ((last_opcode == Instruction::IF_EQZ && pred_bb->fall_through == bb->id) ||
-              (last_opcode == Instruction::IF_NEZ && pred_bb->taken == bb->id)) {
-            // Remember the null check insn if there's no other predecessor requiring null check.
-            if (!copied_first || !vregs_to_check->IsBitSet(last_insn->dalvikInsn.vA)) {
-              null_check_insn = last_insn;
-            }
-          }
+      // Check to see if predecessor had an explicit null-check.
+      if (pred_bb->BranchesToSuccessorOnlyIfNotZero(bb->id)) {
+        // Remember the null check insn if there's no other predecessor requiring null check.
+        if (!copied_first || !vregs_to_check->IsBitSet(pred_bb->last_mir_insn->dalvikInsn.vA)) {
+          null_check_insn = pred_bb->last_mir_insn;
+          DCHECK(null_check_insn != nullptr);
         }
       }
       if (!copied_first) {
@@ -1673,15 +1667,9 @@ void MIRGraph::StringChange() {
       if (opcode == Instruction::NEW_INSTANCE) {
         uint32_t type_idx = mir->dalvikInsn.vB;
         if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
-          // Change NEW_INSTANCE and throwing half of the insn (if it exists) into CONST_4 of 0
+          // Change NEW_INSTANCE into CONST_4 of 0
           mir->dalvikInsn.opcode = Instruction::CONST_4;
           mir->dalvikInsn.vB = 0;
-          MIR* check_mir = GetBasicBlock(bb->predecessors[0])->last_mir_insn;
-          if (check_mir != nullptr &&
-              static_cast<int>(check_mir->dalvikInsn.opcode) == kMirOpCheck) {
-            check_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-            check_mir->dalvikInsn.vB = 0;
-          }
         }
       } else if ((opcode == Instruction::INVOKE_DIRECT) ||
                  (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 6d30e72f86..cf0188456d 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -471,13 +471,18 @@ void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
         NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
       } else {
         // Handle overlap
-        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-          DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
+        if (r_src.GetHighReg() != r_dest.GetLowReg()) {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
         } else {
+          RegStorage r_tmp = AllocTemp();
+          OpRegCopy(r_tmp, r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetHigh(), r_tmp);
+          FreeTemp(r_tmp);
         }
       }
     }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index c803e6588c..8629f39702 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1370,7 +1370,9 @@ void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
   DCHECK(first_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
   const int32_t* first_vreg_to_ssa_map = first_bb->data_flow_info->vreg_to_ssa_map_exit;
   references->ClearAllBits();
-  for (uint32_t vreg = 0, num_vregs = mir_graph_->GetNumOfCodeVRs(); vreg != num_vregs; ++vreg) {
+  for (uint32_t vreg = 0,
+       num_vregs = mir_graph_->GetNumOfCodeVRs() + mir_graph_->GetNumUsedCompilerTemps();
+       vreg != num_vregs; ++vreg) {
     int32_t sreg = first_vreg_to_ssa_map[vreg];
     if (sreg != INVALID_SREG && mir_graph_->reg_location_[sreg].ref &&
         !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[sreg])) {
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 9319c64784..f5ad7c7c33 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -258,13 +258,19 @@ void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
         }
       } else {
         // Here if both src and dest are core registers.
-        // Handle overlap.
-        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+        // Handle overlap
+        if (r_src.GetHighReg() != r_dest.GetLowReg()) {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
         } else {
+          RegStorage r_tmp = AllocTemp();
+          OpRegCopy(r_tmp, r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetHigh(), r_tmp);
+          FreeTemp(r_tmp);
         }
       }
     }
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 58236e2216..2523a83c39 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -377,13 +377,13 @@ static int kAllOpcodes[] = {
     Instruction::IGET_BYTE_QUICK,
     Instruction::IGET_CHAR_QUICK,
     Instruction::IGET_SHORT_QUICK,
-    Instruction::UNUSED_F3,
+    Instruction::INVOKE_LAMBDA,
     Instruction::UNUSED_F4,
     Instruction::UNUSED_F5,
-    Instruction::UNUSED_F6,
+    Instruction::CREATE_LAMBDA,
     Instruction::UNUSED_F7,
-    Instruction::UNUSED_F8,
-    Instruction::UNUSED_F9,
+    Instruction::BOX_LAMBDA,
+    Instruction::UNBOX_LAMBDA,
     Instruction::UNUSED_FA,
     Instruction::UNUSED_FB,
     Instruction::UNUSED_FC,
@@ -421,7 +421,15 @@ static int kInvokeOpcodes[] = {
     Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
 };
 
-// Unsupported opcodes. null can be used when everything is supported. Size of the lists is
+// TODO: Add support for lambda opcodes to the quick compiler.
+static const int kUnsupportedLambdaOpcodes[] = {
+    Instruction::INVOKE_LAMBDA,
+    Instruction::CREATE_LAMBDA,
+    Instruction::BOX_LAMBDA,
+    Instruction::UNBOX_LAMBDA,
+};
+
+// Unsupported opcodes. Null can be used when everything is supported. Size of the lists is
 // recorded below.
 static const int* kUnsupportedOpcodes[] = {
     // 0 = kNone.
@@ -429,17 +437,17 @@ static const int* kUnsupportedOpcodes[] = {
     // 1 = kArm, unused (will use kThumb2).
     kAllOpcodes,
     // 2 = kArm64.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 3 = kThumb2.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 4 = kX86.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 5 = kX86_64.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 6 = kMips.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 7 = kMips64.
-    nullptr
+    kUnsupportedLambdaOpcodes,
 };
 static_assert(sizeof(kUnsupportedOpcodes) == 8 * sizeof(int*), "kUnsupportedOpcodes unexpected");
 
@@ -450,21 +458,26 @@ static const size_t kUnsupportedOpcodesSize[] = {
     // 1 = kArm, unused (will use kThumb2).
     arraysize(kAllOpcodes),
     // 2 = kArm64.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 3 = kThumb2.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 4 = kX86.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 5 = kX86_64.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 6 = kMips.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 7 = kMips64.
-    0
+    arraysize(kUnsupportedLambdaOpcodes),
 };
 static_assert(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
               "kUnsupportedOpcodesSize unexpected");
 
+static bool IsUnsupportedExperimentalLambdasOnly(size_t i) {
+  DCHECK_LE(i, arraysize(kUnsupportedOpcodes));
+  return kUnsupportedOpcodes[i] == kUnsupportedLambdaOpcodes;
+}
+
 // The maximum amount of Dalvik register in a method for which we will start compiling. Tries to
 // avoid an abort when we need to manage more SSA registers than we can.
 static constexpr size_t kMaxAllowedDalvikRegisters = INT16_MAX / 2;
@@ -487,6 +500,30 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set)
   return true;
 }
 
+// If the ISA has unsupported opcodes, should we skip scanning over them?
+//
+// Most of the time we're compiling non-experimental files, so scanning just slows
+// performance down by as much as 6% with 4 threads.
+// In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
+// which will force scanning for any unsupported opcodes.
+static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
+  if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
+    // All opcodes are supported no matter what. Usually not the case
+    // since experimental opcodes are not implemented in the quick compiler.
+    return true;
+  } else if (LIKELY(!Runtime::Current()->AreExperimentalLambdasEnabled())) {
+    // Experimental opcodes are disabled.
+    //
+    // If all unsupported opcodes are experimental we don't need to do scanning.
+    return IsUnsupportedExperimentalLambdasOnly(instruction_set);
+  } else {
+    // Experimental opcodes are enabled.
+    //
+    // Do the opcode scanning if the ISA has any unsupported opcodes.
+    return false;
+  }
+}
+
 // Skip the method that we do not support currently.
 bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                                      CompilationUnit* cu) const {
@@ -498,7 +535,7 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil
 
   // Check whether we do have limitations at all.
   if (kSupportedTypes[cu->instruction_set] == nullptr &&
-      kUnsupportedOpcodesSize[cu->instruction_set] == 0U) {
+      SkipScanningUnsupportedOpcodes(cu->instruction_set)) {
     return true;
   }
 
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 6d485986f6..8a009cb3e5 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -89,14 +89,15 @@ bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) {
   DCHECK(dex_gc_map_.empty());
   size_t num_entries, ref_bitmap_bits, pc_bits;
   ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
-  // There's a single byte to encode the size of each bitmap.
-  if (ref_bitmap_bits >= kBitsPerByte * 8192 /* 13-bit size */) {
+  const size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
+  static constexpr size_t kFormatBits = 3;
+  // We have 16 - kFormatBits available for the ref_bitmap_bytes.
+  if ((ref_bitmap_bytes >> (16u - kFormatBits)) != 0) {
     LOG(WARNING) << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers: "
                  << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
                                  *method_verifier->GetMethodReference().dex_file);
     return false;
   }
-  size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
   // There are 2 bytes to encode the number of entries.
   if (num_entries > std::numeric_limits<uint16_t>::max()) {
     LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: "
@@ -122,7 +123,7 @@ bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) {
   size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
   dex_gc_map_.reserve(table_size);
   // Write table header.
-  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> 5));
+  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> (kBitsPerByte - kFormatBits)));
   dex_gc_map_.push_back(ref_bitmap_bytes & 0xFF);
   dex_gc_map_.push_back(num_entries & 0xFF);
   dex_gc_map_.push_back((num_entries >> 8) & 0xFF);
@@ -147,7 +148,7 @@ void VerifiedMethod::VerifyGcMap(verifier::MethodVerifier* method_verifier,
   // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
   // that the table data is well formed and all references are marked (or not) in the bitmap.
   verifier::DexPcToReferenceMap map(&data[0]);
-  DCHECK_EQ(data.size(), map.RawSize());
+  CHECK_EQ(data.size(), map.RawSize()) << map.NumEntries() << " " << map.RegWidth();
   size_t map_index = 0;
   const DexFile::CodeItem* code_item = method_verifier->CodeItem();
   for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index e0c56fcc82..633bc1b1be 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -250,8 +250,10 @@ inline bool CompilerDriver::CanAssumeClassIsInitialized(mirror::Class* klass) {
 
 inline bool CompilerDriver::CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class,
                                                                 mirror::Class* klass) {
-  return (referrer_class != nullptr && referrer_class->IsSubClass(klass)) ||
-      CanAssumeClassIsInitialized(klass);
+  return (referrer_class != nullptr
+          && !referrer_class->IsInterface()
+          && referrer_class->IsSubClass(klass))
+      || CanAssumeClassIsInitialized(klass);
 }
 
 inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class,
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 84b6a52bda..4cdf75bba4 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1410,7 +1410,9 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType
       is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
     } else {
       is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
-                    heap->FindSpaceFromObject(method->GetDeclaringClass(), false)->IsImageSpace();
+                    heap->FindSpaceFromObject(method->GetDeclaringClass(), false)->IsImageSpace() &&
+                    !cl->IsQuickToInterpreterBridge(
+                        reinterpret_cast<const void*>(compiler_->GetEntryPointOf(method)));
     }
     if (!is_in_image) {
       // We can only branch directly to Methods that are resolved in the DexCache.
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index ad315ee351..ae57755f43 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -54,9 +54,19 @@ void WriteDebugFrameCIE(bool is64bit,
   writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
   writer.PushUleb128(1);  // z: Augmentation data size.
   if (is64bit) {
-    writer.PushUint8(address_type | DW_EH_PE_udata8);  // R: Pointer encoding.
+    if (address_type == DW_EH_PE_pcrel) {
+      writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8);   // R: Pointer encoding.
+    } else {
+      DCHECK(address_type == DW_EH_PE_absptr);
+      writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8);  // R: Pointer encoding.
+    }
   } else {
-    writer.PushUint8(address_type | DW_EH_PE_udata4);  // R: Pointer encoding.
+    if (address_type == DW_EH_PE_pcrel) {
+      writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);   // R: Pointer encoding.
+    } else {
+      DCHECK(address_type == DW_EH_PE_absptr);
+      writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
+    }
   }
   writer.PushData(opcodes.data());
   writer.Pad(is64bit ? 8 : 4);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 97b3725da1..900dabea0e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1759,7 +1759,9 @@ class BCEVisitor : public HGraphVisitor {
     ValueBound lower_bound = range->GetLower();
     DCHECK(lower_bound.IsConstant());
     DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize);
-    DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1);
+    // Note that the lower bound of the array length may have been refined
+    // through other instructions (such as `HNewArray(length - 4)`).
+    DCHECK_LE(const_instr->GetValue() + 1, lower_bound.GetConstant());
 
     // If array_length is less than lower_const, deoptimize.
     HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get(
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 793dd28157..c49752642b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -483,10 +483,11 @@ void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
 
 void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
                                   Primitive::Type type,
-                                  HCompare::Bias bias) {
+                                  HCompare::Bias bias,
+                                  uint32_t dex_pc) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
-  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias));
+  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
@@ -661,7 +662,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
         (target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex())
         && (target_method.dex_file == outer_compilation_unit_->GetDexFile());
 
-    if (optimized_invoke_type == kStatic) {
+    if (optimized_invoke_type == kStatic && !is_string_init) {
       ScopedObjectAccess soa(Thread::Current());
       StackHandleScope<4> hs(soa.Self());
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(
@@ -680,26 +681,30 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
       const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
       Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
           outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
-      Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass()));
+      Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
       // The index at which the method's class is stored in the DexCache's type array.
       uint32_t storage_index = DexFile::kDexNoIndex;
-      bool is_referrer_class = (resolved_method->GetDeclaringClass() == referrer_class.Get());
-      if (is_referrer_class) {
-        storage_index = referrer_class->GetDexTypeIndex();
+      bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
+      if (is_outer_class) {
+        storage_index = outer_class->GetDexTypeIndex();
       } else if (outer_dex_cache.Get() == dex_cache.Get()) {
         // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
         compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
-                                                                   referrer_class.Get(),
+                                                                   GetCompilingClass(),
                                                                    resolved_method,
                                                                    method_idx,
                                                                    &storage_index);
       }
 
-      if (referrer_class.Get()->IsSubClass(resolved_method->GetDeclaringClass())) {
-        // If the referrer class is the declaring class or a subclass
+      if (!outer_class->IsInterface()
+          && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) {
+        // If the outer class is the declaring class or a subclass
         // of the declaring class, no class initialization is needed
         // before the static method call.
+        // Note that in case of inlining, we do not need to add clinit checks
+        // to calls that satisfy this subclass check with any inlined methods. This
+        // will be detected by the optimization passes.
         clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
       } else if (storage_index != DexFile::kDexNoIndex) {
         // If the method's class type index is available, check
@@ -720,7 +725,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
               graph_->GetCurrentMethod(),
               storage_index,
               *dex_compilation_unit_->GetDexFile(),
-              is_referrer_class,
+              is_outer_class,
               dex_pc);
           current_block_->AddInstruction(load_class);
           clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
@@ -809,6 +814,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
   // Add move-result for StringFactory method.
   if (is_string_init) {
     uint32_t orig_this_reg = is_range ? register_index : args[0];
+    UpdateLocal(orig_this_reg, invoke);
     const VerifiedMethod* verified_method =
         compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
     if (verified_method == nullptr) {
@@ -822,10 +828,10 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
     if (map_it != string_init_map.end()) {
       std::set<uint32_t> reg_set = map_it->second;
       for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-        UpdateLocal(*set_it, invoke);
+        HInstruction* load_local = LoadLocal(orig_this_reg, Primitive::kPrimNot);
+        UpdateLocal(*set_it, load_local);
       }
     }
-    UpdateLocal(orig_this_reg, invoke);
   }
   return true;
 }
@@ -878,17 +884,25 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
   return true;
 }
 
-mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
+static mirror::Class* GetClassFrom(CompilerDriver* driver,
+                                   const DexCompilationUnit& compilation_unit) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  const DexFile& dex_file = *compilation_unit.GetDexFile();
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      compilation_unit.GetClassLinker()->FindDexCache(dex_file)));
+
+  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
+}
+
+mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
+}
 
-  return compiler_driver_->ResolveCompilingMethodsClass(
-      soa, outer_dex_cache, class_loader, outer_compilation_unit_);
+mirror::Class* HGraphBuilder::GetCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
 }
 
 bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
@@ -900,9 +914,9 @@ bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
       soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
-  Handle<mirror::Class> compiling_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
-  return compiling_class.Get() == cls.Get();
+  return outer_class.Get() == cls.Get();
 }
 
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
@@ -928,20 +942,20 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
       outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
-  Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the field's class is stored in the DexCache's type array.
   uint32_t storage_index;
-  bool is_referrer_class = (referrer_class.Get() == resolved_field->GetDeclaringClass());
-  if (is_referrer_class) {
-    storage_index = referrer_class->GetDexTypeIndex();
+  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
   } else if (outer_dex_cache.Get() != dex_cache.Get()) {
     // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
     return false;
   } else {
     std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
         outer_dex_cache.Get(),
-        referrer_class.Get(),
+        GetCompilingClass(),
         resolved_field,
         field_index,
         &storage_index);
@@ -959,12 +973,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
                                                  *dex_compilation_unit_->GetDexFile(),
-                                                 is_referrer_class,
+                                                 is_outer_class,
                                                  dex_pc);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
-  if (!is_initialized && !is_referrer_class) {
+  if (!is_initialized && !is_outer_class) {
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
@@ -1463,21 +1477,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
     }
 
     case Instruction::RETURN: {
-      DCHECK_NE(return_type_, Primitive::kPrimNot);
-      DCHECK_NE(return_type_, Primitive::kPrimLong);
-      DCHECK_NE(return_type_, Primitive::kPrimDouble);
       BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_OBJECT: {
-      DCHECK(return_type_ == Primitive::kPrimNot);
       BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_WIDE: {
-      DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong);
       BuildReturn(instruction, return_type_);
       break;
     }
@@ -2105,27 +2114,27 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
       break;
 
     case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias, dex_pc);
       break;
     }
 
     case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias, dex_pc);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 36503ce43a..052aaf8b42 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -117,7 +117,10 @@ class HGraphBuilder : public ValueObject {
   template<typename T>
   void Binop_23x_shift(const Instruction& instruction, Primitive::Type type);
 
-  void Binop_23x_cmp(const Instruction& instruction, Primitive::Type type, HCompare::Bias bias);
+  void Binop_23x_cmp(const Instruction& instruction,
+                     Primitive::Type type,
+                     HCompare::Bias bias,
+                     uint32_t dex_pc);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
@@ -222,8 +225,12 @@ class HGraphBuilder : public ValueObject {
 
   void MaybeRecordStat(MethodCompilationStat compilation_stat);
 
+  // Returns the outer-most compiling method's class.
   mirror::Class* GetOutermostCompilingClass() const;
 
+  // Returns the class whose method is being compiled.
+  mirror::Class* GetCompilingClass() const;
+
   // Returns whether `type_index` points to the outer-most compiling method's class.
   bool IsOutermostCompilingClass(uint16_t type_index) const;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 09f7d86605..cd10935806 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -20,10 +20,12 @@
 #include "code_generator_arm64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
+#include "code_generator_mips64.h"
 #include "compiled_method.h"
 #include "dex/verified_method.h"
 #include "driver/dex_compilation_unit.h"
 #include "gc_map_builder.h"
+#include "graph_visualizer.h"
 #include "leb128.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
@@ -158,12 +160,55 @@ HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
   return block;
 }
 
+class DisassemblyScope {
+ public:
+  DisassemblyScope(HInstruction* instruction, const CodeGenerator& codegen)
+      : codegen_(codegen), instruction_(instruction), start_offset_(static_cast<size_t>(-1)) {
+    if (codegen_.GetDisassemblyInformation() != nullptr) {
+      start_offset_ = codegen_.GetAssembler().CodeSize();
+    }
+  }
+
+  ~DisassemblyScope() {
+    // We avoid building this data when we know it will not be used.
+    if (codegen_.GetDisassemblyInformation() != nullptr) {
+      codegen_.GetDisassemblyInformation()->AddInstructionInterval(
+          instruction_, start_offset_, codegen_.GetAssembler().CodeSize());
+    }
+  }
+
+ private:
+  const CodeGenerator& codegen_;
+  HInstruction* instruction_;
+  size_t start_offset_;
+};
+
+
+void CodeGenerator::GenerateSlowPaths() {
+  size_t code_start = 0;
+  for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) {
+    if (disasm_info_ != nullptr) {
+      code_start = GetAssembler()->CodeSize();
+    }
+    slow_paths_.Get(i)->EmitNativeCode(this);
+    if (disasm_info_ != nullptr) {
+      disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize());
+    }
+  }
+}
+
 void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
   is_baseline_ = is_baseline;
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
+
+  size_t frame_start = GetAssembler()->CodeSize();
   GenerateFrameEntry();
   DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
+  if (disasm_info_ != nullptr) {
+    disasm_info_->SetFrameEntryInterval(frame_start, GetAssembler()->CodeSize());
+  }
+
   for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
     HBasicBlock* block = block_order_->Get(current_block_index_);
     // Don't generate code for an empty block. Its predecessors will branch to its successor
@@ -173,6 +218,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
     Bind(block);
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
+      DisassemblyScope disassembly_scope(current, *this);
       if (is_baseline) {
         InitLocationsBaseline(current);
       }
@@ -181,10 +227,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
     }
   }
 
-  // Generate the slow paths.
-  for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) {
-    slow_paths_.Get(i)->EmitNativeCode(this);
-  }
+  GenerateSlowPaths();
 
   // Finalize instructions in assember;
   Finalize(allocator);
@@ -486,6 +529,11 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
     }
     case kMips:
       return nullptr;
+    case kMips64: {
+      return new mips64::CodeGeneratorMIPS64(graph,
+          *isa_features.AsMips64InstructionSetFeatures(),
+          compiler_options);
+    }
     case kX86: {
       return new x86::CodeGeneratorX86(graph,
            *isa_features.AsX86InstructionSetFeatures(),
@@ -651,18 +699,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path) {
   if (instruction != nullptr) {
-    // The code generated for some type conversions may call the
-    // runtime, thus normally requiring a subsequent call to this
-    // method.  However, the method verifier does not produce PC
-    // information for certain instructions, which are considered "atomic"
-    // (they cannot join a GC).
+    // The code generated for some type conversions and comparisons
+    // may call the runtime, thus normally requiring a subsequent
+    // call to this method. However, the method verifier does not
+    // produce PC information for certain instructions, which are
+    // considered "atomic" (they cannot join a GC).
     // Therefore we do not currently record PC information for such
     // instructions.  As this may change later, we added this special
     // case so that code generators may nevertheless call
     // CodeGenerator::RecordPcInfo without triggering an error in
     // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
     // thereafter.
-    if (instruction->IsTypeConversion()) {
+    if (instruction->IsTypeConversion() || instruction->IsCompare()) {
       return;
     }
     if (instruction->IsRem()) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5b0abd76b3..4cecd61365 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
 #include "base/bit_field.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
+#include "graph_visualizer.h"
 #include "locations.h"
 #include "memory_region.h"
 #include "nodes.h"
@@ -97,6 +98,8 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
     return saved_fpu_stack_offsets_[reg];
   }
 
+  virtual const char* GetDescription() const = 0;
+
  protected:
   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
   static constexpr uint32_t kRegisterNotSaved = -1;
@@ -160,6 +163,7 @@ class CodeGenerator {
   virtual void Bind(HBasicBlock* block) = 0;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
   virtual Assembler* GetAssembler() = 0;
+  virtual const Assembler& GetAssembler() const = 0;
   virtual size_t GetWordSize() const = 0;
   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
@@ -338,6 +342,9 @@ class CodeGenerator {
   static void CreateCommonInvokeLocationSummary(
       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
 
+  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
+  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
+
  protected:
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
@@ -361,6 +368,7 @@ class CodeGenerator {
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
         is_baseline_(false),
+        disasm_info_(nullptr),
         graph_(graph),
         compiler_options_(compiler_options),
         slow_paths_(graph->GetArena(), 8),
@@ -444,9 +452,12 @@ class CodeGenerator {
   // Whether we are using baseline.
   bool is_baseline_;
 
+  DisassemblyInformation* disasm_info_;
+
  private:
   void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
+  void GenerateSlowPaths();
   void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index bb5debba29..9abe2e7be4 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -69,6 +69,8 @@ class NullCheckSlowPathARM : public SlowPathCodeARM {
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM"; }
+
  private:
   HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
@@ -85,6 +87,8 @@ class DivZeroCheckSlowPathARM : public SlowPathCodeARM {
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
@@ -118,6 +122,8 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM {
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM"; }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -155,6 +161,8 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM {
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -197,6 +205,8 @@ class LoadClassSlowPathARM : public SlowPathCodeARM {
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -236,6 +246,8 @@ class LoadStringSlowPathARM : public SlowPathCodeARM {
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -286,6 +298,8 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -310,6 +324,8 @@ class DeoptimizationSlowPathARM : public SlowPathCodeARM {
     arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
@@ -420,6 +436,20 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
       __ AdjustLabelPosition(block_label);
     }
   }
+  // Adjust pc offsets for the disassembly information.
+  if (disasm_info_ != nullptr) {
+    GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+    frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+    frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+    for (auto& it : *disasm_info_->GetInstructionIntervals()) {
+      it.second.start = __ GetAdjustedPosition(it.second.start);
+      it.second.end = __ GetAdjustedPosition(it.second.end);
+    }
+    for (auto& it : *disasm_info_->GetSlowPathIntervals()) {
+      it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start);
+      it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
+    }
+  }
 
   CodeGenerator::Finalize(allocator);
 }
@@ -2586,7 +2616,9 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      // Make the output overlap, as it will be used to hold the masked
+      // second input.
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
       break;
     }
     case Primitive::kPrimLong: {
@@ -2617,13 +2649,13 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
       // Arm doesn't mask the shift count so we need to do it ourselves.
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
-        __ and_(second_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
+        __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
         if (op->IsShl()) {
-          __ Lsl(out_reg, first_reg, second_reg);
+          __ Lsl(out_reg, first_reg, out_reg);
         } else if (op->IsShr()) {
-          __ Asr(out_reg, first_reg, second_reg);
+          __ Asr(out_reg, first_reg, out_reg);
         } else {
-          __ Lsr(out_reg, first_reg, second_reg);
+          __ Lsr(out_reg, first_reg, out_reg);
         }
       } else {
         int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
@@ -2652,44 +2684,44 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
       Register second_reg = second.AsRegister<Register>();
 
       if (op->IsShl()) {
+        __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
         // Shift the high part
-        __ and_(second_reg, second_reg, ShifterOperand(63));
-        __ Lsl(o_h, high, second_reg);
+        __ Lsl(o_h, high, o_l);
         // Shift the low part and `or` what overflew on the high part
-        __ rsb(temp, second_reg, ShifterOperand(32));
+        __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
         __ Lsr(temp, low, temp);
         __ orr(o_h, o_h, ShifterOperand(temp));
         // If the shift is > 32 bits, override the high part
-        __ subs(temp, second_reg, ShifterOperand(32));
+        __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
         __ Lsl(o_h, low, temp, false, PL);
         // Shift the low part
-        __ Lsl(o_l, low, second_reg);
+        __ Lsl(o_l, low, o_l);
       } else if (op->IsShr()) {
+        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
         // Shift the low part
-        __ and_(second_reg, second_reg, ShifterOperand(63));
-        __ Lsr(o_l, low, second_reg);
+        __ Lsr(o_l, low, o_h);
         // Shift the high part and `or` what underflew on the low part
-        __ rsb(temp, second_reg, ShifterOperand(32));
+        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ Lsl(temp, high, temp);
         __ orr(o_l, o_l, ShifterOperand(temp));
         // If the shift is > 32 bits, override the low part
-        __ subs(temp, second_reg, ShifterOperand(32));
+        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
         __ Asr(o_l, high, temp, false, PL);
         // Shift the high part
-        __ Asr(o_h, high, second_reg);
+        __ Asr(o_h, high, o_h);
       } else {
+        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
         // same as Shr except we use `Lsr`s and not `Asr`s
-        __ and_(second_reg, second_reg, ShifterOperand(63));
-        __ Lsr(o_l, low, second_reg);
-        __ rsb(temp, second_reg, ShifterOperand(32));
+        __ Lsr(o_l, low, o_h);
+        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ Lsl(temp, high, temp);
         __ orr(o_l, o_l, ShifterOperand(temp));
-        __ subs(temp, second_reg, ShifterOperand(32));
+        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
         __ Lsr(o_l, high, temp, false, PL);
-        __ Lsr(o_h, high, second_reg);
+        __ Lsr(o_h, high, o_h);
       }
       break;
     }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1599a23568..953e733c44 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -32,6 +32,7 @@ class SlowPathCodeARM;
 
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kArmWordSize = kArmPointerSize;
+static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
 
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
@@ -254,6 +255,10 @@ class CodeGeneratorARM : public CodeGenerator {
     return &assembler_;
   }
 
+  const ArmAssembler& GetAssembler() const OVERRIDE {
+    return assembler_;
+  }
+
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     return GetLabelOf(block)->Position();
   }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3c8f117011..7ec6b54285 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -213,6 +213,8 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -233,6 +235,8 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
@@ -278,6 +282,8 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -319,6 +325,8 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -337,6 +345,8 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
+
  private:
   HNullCheck* const instruction_;
 
@@ -373,6 +383,8 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -429,6 +441,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -453,6 +467,8 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
     arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f96810ff80..bbe3adc76b 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -283,6 +283,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
   Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
+  const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
   vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
 
   // Emit a write barrier.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
new file mode 100644
index 0000000000..ab684d41d4
--- /dev/null
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -0,0 +1,3282 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips64.h"
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "art_method.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils/mips64/assembler_mips64.h"
+#include "utils/assembler.h"
+#include "utils/stack_checks.h"
+
+namespace art {
+namespace mips64 {
+
+static constexpr int kCurrentMethodStackOffset = 0;
+static constexpr GpuRegister kMethodRegisterArgument = A0;
+
+// We need extra temporary/scratch registers (in addition to AT) in some cases.
+static constexpr GpuRegister TMP = T8;
+static constexpr FpuRegister FTMP = F8;
+
+// ART Thread Register.
+static constexpr GpuRegister TR = S1;
+
+Location Mips64ReturnLocation(Primitive::Type return_type) {
+  switch (return_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      return Location::RegisterLocation(V0);
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      return Location::FpuRegisterLocation(F0);
+
+    case Primitive::kPrimVoid:
+      return Location();
+  }
+  UNREACHABLE();
+}
+
+Location InvokeDexCallingConventionVisitorMIPS64::GetReturnLocation(Primitive::Type type) const {
+  return Mips64ReturnLocation(type);
+}
+
+Location InvokeDexCallingConventionVisitorMIPS64::GetMethodLocation() const {
+  return Location::RegisterLocation(kMethodRegisterArgument);
+}
+
+Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(Primitive::Type type) {
+  Location next_location;
+  if (type == Primitive::kPrimVoid) {
+    LOG(FATAL) << "Unexpected parameter type " << type;
+  }
+
+  if (Primitive::IsFloatingPointType(type) &&
+      (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
+    next_location = Location::FpuRegisterLocation(
+        calling_convention.GetFpuRegisterAt(float_index_++));
+    gp_index_++;
+  } else if (!Primitive::IsFloatingPointType(type) &&
+             (gp_index_ < calling_convention.GetNumberOfRegisters())) {
+    next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++));
+    float_index_++;
+  } else {
+    size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+    next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
+                                                 : Location::StackSlot(stack_offset);
+  }
+
+  // Space on the stack is reserved for all arguments.
+  stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
+
+  // TODO: review
+
+  // TODO: shouldn't we use a whole machine word per argument on the stack?
+  // Implicit 4-byte method pointer (and such) will cause misalignment.
+
+  return next_location;
+}
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) {
+  return Mips64ReturnLocation(type);
+}
+
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+
+class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction,
+                            Location index_location,
+                            Location length_location)
+      : instruction_(instruction),
+        index_location_(index_location),
+        length_location_(length_location) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(index_location_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimInt,
+                               length_location_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimInt);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; }
+
+ private:
+  HBoundsCheck* const instruction_;
+  const Location index_location_;
+  const Location length_location_;
+
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
+};
+
+class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; }
+
+ private:
+  HDivZeroCheck* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64);
+};
+
+class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  LoadClassSlowPathMIPS64(HLoadClass* cls,
+                          HInstruction* at,
+                          uint32_t dex_pc,
+                          bool do_clinit)
+      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = at_->GetLocations();
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
+                                            : QUICK_ENTRY_POINT(pInitializeType);
+    mips64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
+
+    // Move the class to the desired location.
+    Location out = locations->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      Primitive::Type type = at_->GetType();
+      mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+    }
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
+
+ private:
+  // The class this slow path will load.
+  HLoadClass* const cls_;
+
+  // The instruction where this slow path is happening.
+  // (Might be the load class or an initialization check).
+  HInstruction* const at_;
+
+  // The dex PC of `at_`.
+  const uint32_t dex_pc_;
+
+  // Whether to initialize the class.
+  const bool do_clinit_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64);
+};
+
+class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+    Primitive::Type type = instruction_->GetType();
+    mips64_codegen->MoveLocation(locations->Out(),
+                                 calling_convention.GetReturnLocation(type),
+                                 type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
+
+ private:
+  HLoadString* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64);
+};
+
+class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : instruction_(instr) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; }
+
+ private:
+  HNullCheck* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64);
+};
+
+class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction,
+                                      HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    if (successor_ == nullptr) {
+      __ B(GetReturnLabel());
+    } else {
+      __ B(mips64_codegen->GetLabelOf(successor_));
+    }
+  }
+
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
+
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; }
+
+ private:
+  HSuspendCheck* const instruction_;
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
+  Label return_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64);
+};
+
+class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  TypeCheckSlowPathMIPS64(HInstruction* instruction,
+                          Location class_to_check,
+                          Location object_class,
+                          uint32_t dex_pc)
+      : instruction_(instruction),
+        class_to_check_(class_to_check),
+        object_class_(object_class),
+        dex_pc_(dex_pc) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(class_to_check_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               object_class_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
+
+    if (instruction_->IsInstanceOf()) {
+      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                    instruction_,
+                                    dex_pc_,
+                                    this);
+      Primitive::Type ret_type = instruction_->GetType();
+      Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+      mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
+                           uint32_t,
+                           const mirror::Class*,
+                           const mirror::Class*>();
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+    }
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location class_to_check_;
+  const Location object_class_;
+  uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
+};
+
+class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+    : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
+};
+
+CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
+                                         const Mips64InstructionSetFeatures& isa_features,
+                                         const CompilerOptions& compiler_options)
+    : CodeGenerator(graph,
+                    kNumberOfGpuRegisters,
+                    kNumberOfFpuRegisters,
+                    0,  // kNumberOfRegisterPairs
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+                                        arraysize(kCoreCalleeSaves)),
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                        arraysize(kFpuCalleeSaves)),
+                    compiler_options),
+      block_labels_(graph->GetArena(), 0),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this),
+      isa_features_(isa_features) {
+  // Save RA (containing the return address) to mimic Quick.
+  AddAllocatedRegister(Location::RegisterLocation(RA));
+}
+
+#undef __
+#define __ down_cast<Mips64Assembler*>(GetAssembler())->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+
+void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
+  CodeGenerator::Finalize(allocator);
+}
+
+Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverMIPS64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverMIPS64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverMIPS64::RestoreScratch(int reg) {
+  // Pop reg
+  __ Ld(GpuRegister(reg), SP, 0);
+  __ DecreaseFrameSize(kMips64WordSize);
+}
+
+void ParallelMoveResolverMIPS64::SpillScratch(int reg) {
+  // Push reg
+  __ IncreaseFrameSize(kMips64WordSize);
+  __ Sd(GpuRegister(reg), SP, 0);
+}
+
+void ParallelMoveResolverMIPS64::Exchange(int index1, int index2, bool double_slot) {
+  LoadOperandType load_type = double_slot ? kLoadDoubleword : kLoadWord;
+  StoreOperandType store_type = double_slot ? kStoreDoubleword : kStoreWord;
+  // Allocate a scratch register other than TMP, if available.
+  // Else, spill V0 (arbitrary choice) and use it as a scratch register (it will be
+  // automatically unspilled when the scratch scope object is destroyed).
+  ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
+  // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
+  int stack_offset = ensure_scratch.IsSpilled() ? kMips64WordSize : 0;
+  __ LoadFromOffset(load_type,
+                    GpuRegister(ensure_scratch.GetRegister()),
+                    SP,
+                    index1 + stack_offset);
+  __ LoadFromOffset(load_type,
+                    TMP,
+                    SP,
+                    index2 + stack_offset);
+  __ StoreToOffset(store_type,
+                   GpuRegister(ensure_scratch.GetRegister()),
+                   SP,
+                   index2 + stack_offset);
+  __ StoreToOffset(store_type, TMP, SP, index1 + stack_offset);
+}
+
+static dwarf::Reg DWARFReg(GpuRegister reg) {
+  return dwarf::Reg::Mips64Core(static_cast<int>(reg));
+}
+
+// TODO: mapping of floating-point registers to DWARF
+
+void CodeGeneratorMIPS64::GenerateFrameEntry() {
+  __ Bind(&frame_entry_label_);
+
+  bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kMips64) || !IsLeafMethod();
+
+  if (do_overflow_check) {
+    __ LoadFromOffset(kLoadWord,
+                      ZERO,
+                      SP,
+                      -static_cast<int32_t>(GetStackOverflowReservedBytes(kMips64)));
+    RecordPcInfo(nullptr, 0);
+  }
+
+  // TODO: anything related to T9/GP/GOT/PIC/.so's?
+
+  if (HasEmptyFrame()) {
+    return;
+  }
+
+  // Make sure the frame size isn't unreasonably large. Per the various APIs
+  // it looks like it should always be less than 2GB in size, which allows
+  // us using 32-bit signed offsets from the stack pointer.
+  if (GetFrameSize() > 0x7FFFFFFF)
+    LOG(FATAL) << "Stack frame larger than 2GB";
+
+  // Spill callee-saved registers.
+  // Note that their cumulative size is small and they can be indexed using
+  // 16-bit offsets.
+
+  // TODO: increment/decrement SP in one step instead of two or remove this comment.
+
+  uint32_t ofs = FrameEntrySpillSize();
+  __ IncreaseFrameSize(ofs);
+
+  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+    GpuRegister reg = kCoreCalleeSaves[i];
+    if (allocated_registers_.ContainsCoreRegister(reg)) {
+      ofs -= kMips64WordSize;
+      __ Sd(reg, SP, ofs);
+      __ cfi().RelOffset(DWARFReg(reg), ofs);
+    }
+  }
+
+  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+    FpuRegister reg = kFpuCalleeSaves[i];
+    if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+      ofs -= kMips64WordSize;
+      __ Sdc1(reg, SP, ofs);
+      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
+    }
+  }
+
+  // Allocate the rest of the frame and store the current method pointer
+  // at its end.
+
+  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
+
+  static_assert(IsInt<16>(kCurrentMethodStackOffset),
+                "kCurrentMethodStackOffset must fit into int16_t");
+  __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+}
+
+void CodeGeneratorMIPS64::GenerateFrameExit() {
+  __ cfi().RememberState();
+
+  // TODO: anything related to T9/GP/GOT/PIC/.so's?
+
+  if (!HasEmptyFrame()) {
+    // Deallocate the rest of the frame.
+
+    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
+
+    // Restore callee-saved registers.
+    // Note that their cumulative size is small and they can be indexed using
+    // 16-bit offsets.
+
+    // TODO: increment/decrement SP in one step instead of two or remove this comment.
+
+    uint32_t ofs = 0;
+
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      FpuRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        __ Ldc1(reg, SP, ofs);
+        ofs += kMips64WordSize;
+        // TODO: __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+      GpuRegister reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        __ Ld(reg, SP, ofs);
+        ofs += kMips64WordSize;
+        __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    DCHECK_EQ(ofs, FrameEntrySpillSize());
+    __ DecreaseFrameSize(ofs);
+  }
+
+  __ Jr(RA);
+
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorMIPS64::Bind(HBasicBlock* block) {
+  __ Bind(GetLabelOf(block));
+}
+
+void CodeGeneratorMIPS64::MoveLocation(Location destination,
+                                       Location source,
+                                       Primitive::Type type) {
+  if (source.Equals(destination)) {
+    return;
+  }
+
+  // A valid move can always be inferred from the destination and source
+  // locations. When moving from and to a register, the argument type can be
+  // used to generate 32bit instead of 64bit moves.
+  bool unspecified_type = (type == Primitive::kPrimVoid);
+  DCHECK_EQ(unspecified_type, false);
+
+  if (destination.IsRegister() || destination.IsFpuRegister()) {
+    if (unspecified_type) {
+      HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+      if (source.IsStackSlot() ||
+          (src_cst != nullptr && (src_cst->IsIntConstant()
+                                  || src_cst->IsFloatConstant()
+                                  || src_cst->IsNullConstant()))) {
+        // For stack slots and 32bit constants, a 64bit type is appropriate.
+        type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
+      } else {
+        // If the source is a double stack slot or a 64bit constant, a 64bit
+        // type is appropriate. Else the source is a register, and since the
+        // type has not been specified, we chose a 64bit type to force a 64bit
+        // move.
+        type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
+      }
+    }
+    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(type)) ||
+           (destination.IsRegister() && !Primitive::IsFloatingPointType(type)));
+    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+      // Move to GPR/FPR from stack
+      LoadOperandType load_type = source.IsStackSlot() ? kLoadWord : kLoadDoubleword;
+      if (Primitive::IsFloatingPointType(type)) {
+        __ LoadFpuFromOffset(load_type,
+                             destination.AsFpuRegister<FpuRegister>(),
+                             SP,
+                             source.GetStackIndex());
+      } else {
+        // TODO: use load_type = kLoadUnsignedWord when type == Primitive::kPrimNot.
+        __ LoadFromOffset(load_type,
+                          destination.AsRegister<GpuRegister>(),
+                          SP,
+                          source.GetStackIndex());
+      }
+    } else if (source.IsConstant()) {
+      // Move to GPR/FPR from constant
+      GpuRegister gpr = AT;
+      if (!Primitive::IsFloatingPointType(type)) {
+        gpr = destination.AsRegister<GpuRegister>();
+      }
+      if (type == Primitive::kPrimInt || type == Primitive::kPrimFloat) {
+        __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+      } else {
+        __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+      }
+      if (type == Primitive::kPrimFloat) {
+        __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
+      } else if (type == Primitive::kPrimDouble) {
+        __ Dmtc1(gpr, destination.AsFpuRegister<FpuRegister>());
+      }
+    } else {
+      if (destination.IsRegister()) {
+        // Move to GPR from GPR
+        __ Move(destination.AsRegister<GpuRegister>(), source.AsRegister<GpuRegister>());
+      } else {
+        // Move to FPR from FPR
+        if (type == Primitive::kPrimFloat) {
+          __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        } else {
+          DCHECK_EQ(type, Primitive::kPrimDouble);
+          __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        }
+      }
+    }
+  } else {  // The destination is not a register. It must be a stack slot.
+    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+    if (source.IsRegister() || source.IsFpuRegister()) {
+      if (unspecified_type) {
+        if (source.IsRegister()) {
+          type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
+        } else {
+          type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
+        }
+      }
+      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(type)) &&
+             (source.IsFpuRegister() == Primitive::IsFloatingPointType(type)));
+      // Move to stack from GPR/FPR
+      StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      if (source.IsRegister()) {
+        __ StoreToOffset(store_type,
+                         source.AsRegister<GpuRegister>(),
+                         SP,
+                         destination.GetStackIndex());
+      } else {
+        __ StoreFpuToOffset(store_type,
+                            source.AsFpuRegister<FpuRegister>(),
+                            SP,
+                            destination.GetStackIndex());
+      }
+    } else if (source.IsConstant()) {
+      // Move to stack from constant
+      HConstant* src_cst = source.GetConstant();
+      StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      if (destination.IsStackSlot()) {
+        __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+      } else {
+        __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+      }
+      __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+    } else {
+      DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+      DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
+      // Move to stack from stack
+      if (destination.IsStackSlot()) {
+        __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex());
+        __ StoreToOffset(kStoreWord, TMP, SP, destination.GetStackIndex());
+      } else {
+        __ LoadFromOffset(kLoadDoubleword, TMP, SP, source.GetStackIndex());
+        __ StoreToOffset(kStoreDoubleword, TMP, SP, destination.GetStackIndex());
+      }
+    }
+  }
+}
+
+void CodeGeneratorMIPS64::SwapLocations(Location loc1,
+                                        Location loc2,
+                                        Primitive::Type type ATTRIBUTE_UNUSED) {
+  DCHECK(!loc1.IsConstant());
+  DCHECK(!loc2.IsConstant());
+
+  if (loc1.Equals(loc2)) {
+    return;
+  }
+
+  bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+  bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+  bool is_fp_reg1 = loc1.IsFpuRegister();
+  bool is_fp_reg2 = loc2.IsFpuRegister();
+
+  if (loc2.IsRegister() && loc1.IsRegister()) {
+    // Swap 2 GPRs
+    GpuRegister r1 = loc1.AsRegister<GpuRegister>();
+    GpuRegister r2 = loc2.AsRegister<GpuRegister>();
+    __ Move(TMP, r2);
+    __ Move(r2, r1);
+    __ Move(r1, TMP);
+  } else if (is_fp_reg2 && is_fp_reg1) {
+    // Swap 2 FPRs
+    FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
+    FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
+    // TODO: Can MOV.S/MOV.D be used here to save one instruction?
+    // Need to distinguish float from double, right?
+    __ Dmfc1(TMP, r2);
+    __ Dmfc1(AT, r1);
+    __ Dmtc1(TMP, r1);
+    __ Dmtc1(AT, r2);
+  } else if (is_slot1 != is_slot2) {
+    // Swap GPR/FPR and stack slot
+    Location reg_loc = is_slot1 ? loc2 : loc1;
+    Location mem_loc = is_slot1 ? loc1 : loc2;
+    LoadOperandType load_type = mem_loc.IsStackSlot() ? kLoadWord : kLoadDoubleword;
+    StoreOperandType store_type = mem_loc.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+    // TODO: use load_type = kLoadUnsignedWord when type == Primitive::kPrimNot.
+    __ LoadFromOffset(load_type, TMP, SP, mem_loc.GetStackIndex());
+    if (reg_loc.IsFpuRegister()) {
+      __ StoreFpuToOffset(store_type,
+                          reg_loc.AsFpuRegister<FpuRegister>(),
+                          SP,
+                          mem_loc.GetStackIndex());
+      // TODO: review this MTC1/DMTC1 move
+      if (mem_loc.IsStackSlot()) {
+        __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
+      } else {
+        DCHECK(mem_loc.IsDoubleStackSlot());
+        __ Dmtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
+      }
+    } else {
+      __ StoreToOffset(store_type, reg_loc.AsRegister<GpuRegister>(), SP, mem_loc.GetStackIndex());
+      __ Move(reg_loc.AsRegister<GpuRegister>(), TMP);
+    }
+  } else if (is_slot1 && is_slot2) {
+    move_resolver_.Exchange(loc1.GetStackIndex(),
+                            loc2.GetStackIndex(),
+                            loc1.IsDoubleStackSlot());
+  } else {
+    LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2;
+  }
+}
+
+void CodeGeneratorMIPS64::Move(HInstruction* instruction,
+                               Location location,
+                               HInstruction* move_for) {
+  LocationSummary* locations = instruction->GetLocations();
+  Primitive::Type type = instruction->GetType();
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (instruction->IsCurrentMethod()) {
+    MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset), type);
+  } else if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  } else if (instruction->IsIntConstant()
+             || instruction->IsLongConstant()
+             || instruction->IsNullConstant()) {
+    if (location.IsRegister()) {
+      // Move to GPR from constant
+      GpuRegister dst = location.AsRegister<GpuRegister>();
+      if (instruction->IsNullConstant() || instruction->IsIntConstant()) {
+        __ LoadConst32(dst, GetInt32ValueOf(instruction->AsConstant()));
+      } else {
+        __ LoadConst64(dst, instruction->AsLongConstant()->GetValue());
+      }
+    } else {
+      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
+      // Move to stack from constant
+      if (location.IsStackSlot()) {
+        __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
+        __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+      } else {
+        __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
+        __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+      }
+    }
+  } else if (instruction->IsTemporary()) {
+    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
+    MoveLocation(location, temp_location, type);
+  } else if (instruction->IsLoadLocal()) {
+    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
+    if (Primitive::Is64BitType(type)) {
+      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
+    } else {
+      MoveLocation(location, Location::StackSlot(stack_slot), type);
+    }
+  } else {
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
+    MoveLocation(location, locations->Out(), type);
+  }
+}
+
+Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
+  Primitive::Type type = load->GetType();
+
+  switch (type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << type;
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location::NoLocation();
+}
+
+void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
+  Label done;
+  GpuRegister card = AT;
+  GpuRegister temp = TMP;
+  __ Beqzc(value, &done);
+  __ LoadFromOffset(kLoadDoubleword,
+                    card,
+                    TR,
+                    Thread::CardTableOffset<kMips64WordSize>().Int32Value());
+  __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Daddu(temp, card, temp);
+  __ Sb(card, temp, 0);
+  __ Bind(&done);
+}
+
+void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+  // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
+  blocked_core_registers_[ZERO] = true;
+  blocked_core_registers_[K0] = true;
+  blocked_core_registers_[K1] = true;
+  blocked_core_registers_[GP] = true;
+  blocked_core_registers_[SP] = true;
+  blocked_core_registers_[RA] = true;
+
+  // AT and TMP(T8) are used as temporary/scratch registers
+  // (similar to how AT is used by MIPS assemblers).
+  blocked_core_registers_[AT] = true;
+  blocked_core_registers_[TMP] = true;
+  blocked_fpu_registers_[FTMP] = true;
+
+  // Reserve suspend and thread registers.
+  blocked_core_registers_[S0] = true;
+  blocked_core_registers_[TR] = true;
+
+  // Reserve T9 for function calls
+  blocked_core_registers_[T9] = true;
+
+  // TODO: review; anything else?
+
+  // TODO: make these two for's conditional on is_baseline once
+  // all the issues with register saving/restoring are sorted out.
+  for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+    blocked_core_registers_[kCoreCalleeSaves[i]] = true;
+  }
+
+  for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+    blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+  }
+}
+
+Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
+  if (type == Primitive::kPrimVoid) {
+    LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  if (Primitive::IsFloatingPointType(type)) {
+    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
+    return Location::FpuRegisterLocation(reg);
+  } else {
+    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
+    return Location::RegisterLocation(reg);
+  }
+}
+
+size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << Mips64ManagedRegister::FromGpuRegister(GpuRegister(reg));
+}
+
+void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << Mips64ManagedRegister::FromFpuRegister(FpuRegister(reg));
+}
+
+void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset,
+                                        HInstruction* instruction,
+                                        uint32_t dex_pc,
+                                        SlowPathCode* slow_path) {
+  // TODO: anything related to T9/GP/GOT/PIC/.so's?
+  __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+  __ Jalr(T9);
+  RecordPcInfo(instruction, dex_pc, slow_path);
+  DCHECK(instruction->IsSuspendCheck()
+      || instruction->IsBoundsCheck()
+      || instruction->IsNullCheck()
+      || instruction->IsDivZeroCheck()
+      || !IsLeafMethod());
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
+                                                                      GpuRegister class_reg) {
+  __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
+  __ LoadConst32(AT, mirror::Class::kStatusInitialized);
+  __ Bltc(TMP, AT, slow_path->GetEntryLabel());
+  // TODO: barrier needed?
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
+  __ Sync(0);  // only stype 0 is supported
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                          HBasicBlock* successor) {
+  SuspendCheckSlowPathMIPS64* slow_path =
+    new (GetGraph()->GetArena()) SuspendCheckSlowPathMIPS64(instruction, successor);
+  codegen_->AddSlowPath(slow_path);
+
+  __ LoadFromOffset(kLoadUnsignedHalfword,
+                    TMP,
+                    TR,
+                    Thread::ThreadFlagsOffset<kMips64WordSize>().Int32Value());
+  if (successor == nullptr) {
+    __ Bnezc(TMP, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Beqzc(TMP, codegen_->GetLabelOf(successor));
+    __ B(slow_path->GetEntryLabel());
+    // slow_path will return to GetLabelOf(successor).
+  }
+}
+
+InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
+                                                               CodeGeneratorMIPS64* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
+void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) {
+  DCHECK_EQ(instruction->InputCount(), 2U);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      HInstruction* right = instruction->InputAt(1);
+      bool can_use_imm = false;
+      if (right->IsConstant()) {
+        int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
+        if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
+          can_use_imm = IsUint<16>(imm);
+        } else if (instruction->IsAdd()) {
+          can_use_imm = IsInt<16>(imm);
+        } else {
+          DCHECK(instruction->IsSub());
+          can_use_imm = IsInt<16>(-imm);
+        }
+      }
+      if (can_use_imm)
+        locations->SetInAt(1, Location::ConstantLocation(right->AsConstant()));
+      else
+        locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      Location rhs_location = locations->InAt(1);
+
+      GpuRegister rhs_reg = ZERO;
+      int64_t rhs_imm = 0;
+      bool use_imm = rhs_location.IsConstant();
+      if (use_imm) {
+        rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+      } else {
+        rhs_reg = rhs_location.AsRegister<GpuRegister>();
+      }
+
+      if (instruction->IsAnd()) {
+        if (use_imm)
+          __ Andi(dst, lhs, rhs_imm);
+        else
+          __ And(dst, lhs, rhs_reg);
+      } else if (instruction->IsOr()) {
+        if (use_imm)
+          __ Ori(dst, lhs, rhs_imm);
+        else
+          __ Or(dst, lhs, rhs_reg);
+      } else if (instruction->IsXor()) {
+        if (use_imm)
+          __ Xori(dst, lhs, rhs_imm);
+        else
+          __ Xor(dst, lhs, rhs_reg);
+      } else if (instruction->IsAdd()) {
+        if (type == Primitive::kPrimInt) {
+          if (use_imm)
+            __ Addiu(dst, lhs, rhs_imm);
+          else
+            __ Addu(dst, lhs, rhs_reg);
+        } else {
+          if (use_imm)
+            __ Daddiu(dst, lhs, rhs_imm);
+          else
+            __ Daddu(dst, lhs, rhs_reg);
+        }
+      } else {
+        DCHECK(instruction->IsSub());
+        if (type == Primitive::kPrimInt) {
+          if (use_imm)
+            __ Addiu(dst, lhs, -rhs_imm);
+          else
+            __ Subu(dst, lhs, rhs_reg);
+        } else {
+          if (use_imm)
+            __ Daddiu(dst, lhs, -rhs_imm);
+          else
+            __ Dsubu(dst, lhs, rhs_reg);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      if (instruction->IsAdd()) {
+        if (type == Primitive::kPrimFloat)
+          __ AddS(dst, lhs, rhs);
+        else
+          __ AddD(dst, lhs, rhs);
+      } else if (instruction->IsSub()) {
+        if (type == Primitive::kPrimFloat)
+          __ SubS(dst, lhs, rhs);
+        else
+          __ SubD(dst, lhs, rhs);
+      } else {
+        LOG(FATAL) << "Unexpected floating-point binary operation";
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected binary operation type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  LocationSummary* locations = instr->GetLocations();
+  Primitive::Type type = instr->GetType();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      Location rhs_location = locations->InAt(1);
+
+      GpuRegister rhs_reg = ZERO;
+      int64_t rhs_imm = 0;
+      bool use_imm = rhs_location.IsConstant();
+      if (use_imm) {
+        rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+      } else {
+        rhs_reg = rhs_location.AsRegister<GpuRegister>();
+      }
+
+      if (use_imm) {
+        uint32_t shift_value = (type == Primitive::kPrimInt)
+          ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
+          : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
+
+        if (type == Primitive::kPrimInt) {
+          if (instr->IsShl()) {
+            __ Sll(dst, lhs, shift_value);
+          } else if (instr->IsShr()) {
+            __ Sra(dst, lhs, shift_value);
+          } else {
+            __ Srl(dst, lhs, shift_value);
+          }
+        } else {
+          if (shift_value < 32) {
+            if (instr->IsShl()) {
+              __ Dsll(dst, lhs, shift_value);
+            } else if (instr->IsShr()) {
+              __ Dsra(dst, lhs, shift_value);
+            } else {
+              __ Dsrl(dst, lhs, shift_value);
+            }
+          } else {
+            shift_value -= 32;
+            if (instr->IsShl()) {
+              __ Dsll32(dst, lhs, shift_value);
+            } else if (instr->IsShr()) {
+              __ Dsra32(dst, lhs, shift_value);
+            } else {
+              __ Dsrl32(dst, lhs, shift_value);
+            }
+          }
+        }
+      } else {
+        if (type == Primitive::kPrimInt) {
+          if (instr->IsShl()) {
+            __ Sllv(dst, lhs, rhs_reg);
+          } else if (instr->IsShr()) {
+            __ Srav(dst, lhs, rhs_reg);
+          } else {
+            __ Srlv(dst, lhs, rhs_reg);
+          }
+        } else {
+          if (instr->IsShl()) {
+            __ Dsllv(dst, lhs, rhs_reg);
+          } else if (instr->IsShr()) {
+            __ Dsrav(dst, lhs, rhs_reg);
+          } else {
+            __ Dsrlv(dst, lhs, rhs_reg);
+          }
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift operation type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location index = locations->InAt(1);
+  Primitive::Type type = instruction->GetType();
+
+  switch (type) {
+    case Primitive::kPrimBoolean: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+      } else {
+        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
+        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimByte: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+      } else {
+        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
+        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimShort: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimChar: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadFromOffset(load_type, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(load_type, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadFpuFromOffset(kLoadWord, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      UNREACHABLE();
+  }
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  __ LoadFromOffset(kLoadWord, out, obj, offset);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool is_object = value_type == Primitive::kPrimNot;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (is_object) {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+      locations->SetInAt(2, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(2, Location::RequiresRegister());
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location index = locations->InAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_runtime_call = locations->WillCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ StoreToOffset(kStoreByte, value, obj, offset);
+      } else {
+        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
+        __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      if (!needs_runtime_call) {
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ StoreToOffset(kStoreWord, value, obj, offset);
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+          __ Daddu(TMP, obj, TMP);
+          __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+        }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (needs_write_barrier) {
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          codegen_->MarkGCCard(obj, value);
+        }
+      } else {
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                instruction,
+                                instruction->GetDexPc(),
+                                nullptr);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
+      DCHECK(locations->InAt(2).IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreFpuToOffset(kStoreWord, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
+      DCHECK(locations->InAt(2).IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      UNREACHABLE();
+  }
+
+  // Ints and objects are handled in the switch.
+  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  BoundsCheckSlowPathMIPS64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(
+      instruction,
+      locations->InAt(0),
+      locations->InAt(1));
+  codegen_->AddSlowPath(slow_path);
+
+  GpuRegister index = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister length = locations->InAt(1).AsRegister<GpuRegister>();
+
+  // length is limited by the maximum positive signed 32-bit integer.
+  // Unsigned comparison of length and index checks for index < 0
+  // and for length <= index simultaneously.
+  // Mips R6 requires lhs != rhs for compact branches.
+  if (index == length) {
+    __ B(slow_path->GetEntryLabel());
+  } else {
+    __ Bgeuc(index, length, slow_path->GetEntryLabel());
+  }
+}
+
+void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
+
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(
+      instruction,
+      locations->InAt(1),
+      Location::RegisterLocation(obj_cls),
+      instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ Beqzc(obj, slow_path->GetExitLabel());
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+  __ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (check->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) {
+  // We assume the class is not null.
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+      check->GetLoadClass(),
+      check,
+      check->GetDexPc(),
+      true);
+  codegen_->AddSlowPath(slow_path);
+  GenerateClassInitializationCheck(slow_path,
+                                   check->GetLocations()->InAt(0).AsRegister<GpuRegister>());
+}
+
+void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
+
+  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+
+  switch (in_type) {
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Primitive::Type in_type = instruction->InputAt(0)->GetType();
+
+  //  0 if: left == right
+  //  1 if: left  > right
+  // -1 if: left  < right
+  switch (in_type) {
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      // TODO: more efficient (direct) comparison with a constant
+      __ Slt(TMP, lhs, rhs);
+      __ Slt(dst, rhs, lhs);
+      __ Subu(dst, dst, TMP);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int32_t entry_point_offset;
+      if (in_type == Primitive::kPrimFloat) {
+        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat)
+                                                     : QUICK_ENTRY_POINT(pCmplFloat);
+      } else {
+        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble)
+                                                     : QUICK_ENTRY_POINT(pCmplDouble);
+      }
+      codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << in_type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (instruction->NeedsMaterialization()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
+  if (!instruction->NeedsMaterialization()) {
+    return;
+  }
+
+  LocationSummary* locations = instruction->GetLocations();
+
+  GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+
+  IfCondition if_cond = instruction->GetCondition();
+
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        if (if_cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (if_cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  switch (div->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ DivR6(dst, lhs, rhs);
+      else
+        __ Ddiv(dst, lhs, rhs);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      if (type == Primitive::kPrimFloat)
+        __ DivS(dst, lhs, rhs);
+      else
+        __ DivD(dst, lhs, rhs);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected div type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  SlowPathCodeMIPS64* slow_path =
+      new (GetGraph()->GetArena()) DivZeroCheckSlowPathMIPS64(instruction);
+  codegen_->AddSlowPath(slow_path);
+  Location value = instruction->GetLocations()->InAt(0);
+
+  Primitive::Type type = instruction->GetType();
+
+  if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) {
+      LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+  }
+
+  if (value.IsConstant()) {
+    int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
+    if (divisor == 0) {
+      __ B(slow_path->GetEntryLabel());
+    } else {
+      // A division by a non-null constant is valid. We don't need to perform
+      // any check, so simply fall through.
+    }
+  } else {
+    __ Beqzc(value.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+  }
+}
+
+void LocationsBuilderMIPS64::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDoubleConstant(HDoubleConstant* cst ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitExit(HExit* exit) {
+  exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
+}
+
+void LocationsBuilderMIPS64::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitGoto(HGoto* got) {
+  got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitGoto(HGoto* got) {
+  HBasicBlock* successor = got->GetSuccessor();
+  DCHECK(!successor->IsExitBlock());
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+  HLoopInformation* info = block->GetLoopInformation();
+
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(block, successor)) {
+    __ B(codegen_->GetLabelOf(successor));
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
+                                                           Label* true_target,
+                                                           Label* false_target,
+                                                           Label* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
+  HCondition* condition = cond->AsCondition();
+
+  if (cond->IsIntConstant()) {
+    int32_t cond_value = cond->AsIntConstant()->GetValue();
+    if (cond_value == 1) {
+      if (always_true_target != nullptr) {
+        __ B(always_true_target);
+      }
+      return;
+    } else {
+      DCHECK_EQ(cond_value, 0);
+    }
+  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    // The condition instruction has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(0);
+    DCHECK(cond_val.IsRegister());
+    __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+  } else {
+    // The condition instruction has not been materialized, use its inputs as
+    // the comparison and its condition as the branch condition.
+    GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+    Location rhs_location = condition->GetLocations()->InAt(1);
+    GpuRegister rhs_reg = ZERO;
+    int32_t rhs_imm = 0;
+    bool use_imm = rhs_location.IsConstant();
+    if (use_imm) {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_reg = rhs_location.AsRegister<GpuRegister>();
+    }
+
+    IfCondition if_cond = condition->GetCondition();
+    if (use_imm && rhs_imm == 0) {
+      switch (if_cond) {
+        case kCondEQ:
+          __ Beqzc(lhs, true_target);
+          break;
+        case kCondNE:
+          __ Bnezc(lhs, true_target);
+          break;
+        case kCondLT:
+          __ Bltzc(lhs, true_target);
+          break;
+        case kCondGE:
+          __ Bgezc(lhs, true_target);
+          break;
+        case kCondLE:
+          __ Blezc(lhs, true_target);
+          break;
+        case kCondGT:
+          __ Bgtzc(lhs, true_target);
+          break;
+      }
+    } else {
+      if (use_imm) {
+        rhs_reg = TMP;
+        __ LoadConst32(rhs_reg, rhs_imm);
+      }
+      // It looks like we can get here with lhs == rhs. Should that be possible at all?
+      // Mips R6 requires lhs != rhs for compact branches.
+      if (lhs == rhs_reg) {
+        DCHECK(!use_imm);
+        switch (if_cond) {
+          case kCondEQ:
+          case kCondGE:
+          case kCondLE:
+            // if lhs == rhs for a positive condition, then it is a branch
+            __ B(true_target);
+            break;
+          case kCondNE:
+          case kCondLT:
+          case kCondGT:
+            // if lhs == rhs for a negative condition, then it is a NOP
+            break;
+        }
+      } else {
+        switch (if_cond) {
+          case kCondEQ:
+            __ Beqc(lhs, rhs_reg, true_target);
+            break;
+          case kCondNE:
+            __ Bnec(lhs, rhs_reg, true_target);
+            break;
+          case kCondLT:
+            __ Bltc(lhs, rhs_reg, true_target);
+            break;
+          case kCondGE:
+            __ Bgec(lhs, rhs_reg, true_target);
+            break;
+          case kCondLE:
+            __ Bgec(rhs_reg, lhs, true_target);
+            break;
+          case kCondGT:
+            __ Bltc(rhs_reg, lhs, true_target);
+            break;
+        }
+      }
+    }
+  }
+  if (false_target != nullptr) {
+    __ B(false_target);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
+  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  Label* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
+  }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathMIPS64(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  Label* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
+void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
+                                            const FieldInfo& field_info ATTRIBUTE_UNUSED) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister());
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
+                                                    const FieldInfo& field_info) {
+  Primitive::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  LoadOperandType load_type = kLoadUnsignedByte;
+  switch (type) {
+    case Primitive::kPrimBoolean:
+      load_type = kLoadUnsignedByte;
+      break;
+    case Primitive::kPrimByte:
+      load_type = kLoadSignedByte;
+      break;
+    case Primitive::kPrimShort:
+      load_type = kLoadSignedHalfword;
+      break;
+    case Primitive::kPrimChar:
+      load_type = kLoadUnsignedHalfword;
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      load_type = kLoadWord;
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      load_type = kLoadDoubleword;
+      break;
+    case Primitive::kPrimNot:
+      load_type = kLoadUnsignedWord;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+  if (!Primitive::IsFloatingPointType(type)) {
+    DCHECK(locations->Out().IsRegister());
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+  } else {
+    DCHECK(locations->Out().IsFpuRegister());
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+  }
+
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // TODO: memory barrier?
+}
+
+void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
+                                            const FieldInfo& field_info ATTRIBUTE_UNUSED) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
+                                                    const FieldInfo& field_info) {
+  Primitive::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  StoreOperandType store_type = kStoreByte;
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      store_type = kStoreByte;
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      store_type = kStoreHalfword;
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimNot:
+      store_type = kStoreWord;
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      store_type = kStoreDoubleword;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+  if (!Primitive::IsFloatingPointType(type)) {
+    DCHECK(locations->InAt(1).IsRegister());
+    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+    __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+  } else {
+    DCHECK(locations->InAt(1).IsFpuRegister());
+    FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
+    __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+  }
+
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // TODO: memory barriers?
+  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+    DCHECK(locations->InAt(1).IsRegister());
+    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+    codegen_->MarkGCCard(obj, src);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind =
+      instruction->IsClassFinal() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The output does overlap inputs.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  Label done;
+
+  // Return 0 if `obj` is null.
+  // TODO: Avoid this check if we know `obj` is not null.
+  __ Move(out, ZERO);
+  __ Beqzc(obj, &done);
+
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+  if (instruction->IsClassFinal()) {
+    // Classes must be equal for the instanceof to succeed.
+    __ Xor(out, out, cls);
+    __ Sltiu(out, out, 1);
+  } else {
+    // If the classes are not equal, we go into a slow path.
+    DCHECK(locations->OnlyCallsOnSlowPath());
+    SlowPathCodeMIPS64* slow_path =
+        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                             locations->InAt(1),
+                                                             locations->Out(),
+                                                             instruction->GetDexPc());
+    codegen_->AddSlowPath(slow_path);
+    __ Bnec(out, cls, slow_path->GetEntryLabel());
+    __ LoadConst32(out, 1);
+    __ Bind(slow_path->GetExitLabel());
+  }
+
+  __ Bind(&done);
+}
+
+void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::HandleInvoke(HInvoke* invoke) {
+  InvokeDexCallingConventionVisitorMIPS64 calling_convention_visitor;
+  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+}
+
+void LocationsBuilderMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
+  HandleInvoke(invoke);
+  // The register T0 is required to be used for the hidden argument in
+  // art_quick_imt_conflict_trampoline, so add the hidden argument.
+  invoke->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
+  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
+  Location receiver = invoke->GetLocations()->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+
+  // Set the hidden argument.
+  __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
+                 invoke->GetDexMethodIndex());
+
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ LoadFromOffset(kLoadUnsignedWord, temp, SP, receiver.GetStackIndex());
+    __ LoadFromOffset(kLoadUnsignedWord, temp, temp, class_offset);
+  } else {
+    __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  }
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // temp = temp->GetImtEntryAt(method_offset);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
+  // T9 = temp->GetEntryPoint();
+  __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
+  // T9();
+  __ Jalr(T9);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  // TODO intrinsic function
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
+  // TODO - intrinsic function
+  HandleInvoke(invoke);
+
+  // While SetupBlockedRegisters() blocks registers S2-S8 due to their
+  // clobbering somewhere else, reduce further register pressure by avoiding
+  // allocation of a register for the current method pointer like on x86 baseline.
+  // TODO: remove this once all the issues with register saving/restoring are
+  // sorted out.
+  LocationSummary* locations = invoke->GetLocations();
+  Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+  if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+    locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+  }
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke,
+                                     CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    // TODO - intrinsic function
+    return true;
+  }
+  return false;
+}
+
+void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // All registers are assumed to be correctly set up per the calling convention.
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  if (invoke->IsStringInit()) {
+    GpuRegister reg = temp.AsRegister<GpuRegister>();
+    // temp = thread->string_init_entrypoint
+    __ LoadFromOffset(kLoadDoubleword,
+                      reg,
+                      TR,
+                      invoke->GetStringInitOffset());
+    // T9 = temp->entry_point_from_quick_compiled_code_;
+    __ LoadFromOffset(kLoadDoubleword,
+                      T9,
+                      reg,
+                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kMips64WordSize).Int32Value());
+    // T9()
+    __ Jalr(T9);
+  } else if (invoke->IsRecursive()) {
+    __ Jalr(&frame_entry_label_, T9);
+  } else {
+    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+    GpuRegister reg = temp.AsRegister<GpuRegister>();
+    GpuRegister method_reg;
+    if (current_method.IsRegister()) {
+      method_reg = current_method.AsRegister<GpuRegister>();
+    } else {
+      // TODO: use the appropriate DCHECK() here if possible.
+      // DCHECK(invoke->GetLocations()->Intrinsified());
+      DCHECK(!current_method.IsValid());
+      method_reg = reg;
+      __ Ld(reg, SP, kCurrentMethodStackOffset);
+    }
+
+    // temp = temp->dex_cache_resolved_methods_;
+    __ LoadFromOffset(kLoadUnsignedWord,
+                      reg,
+                      method_reg,
+                      ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+    // temp = temp[index_in_cache]
+    __ LoadFromOffset(kLoadDoubleword,
+                      reg,
+                      reg,
+                      CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()));
+    // T9 = temp[offset_of_quick_compiled_code]
+    __ LoadFromOffset(kLoadDoubleword,
+                      T9,
+                      reg,
+                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kMips64WordSize).Int32Value());
+    // T9()
+    __ Jalr(T9);
+  }
+
+  DCHECK(!IsLeafMethod());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  codegen_->GenerateStaticOrDirectCall(invoke,
+                                       locations->HasTemps()
+                                           ? locations->GetTemp(0)
+                                           : Location::NoLocation());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  // TODO: Try to generate intrinsics code.
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+      invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+
+  // temp = object->GetClass();
+  DCHECK(receiver.IsRegister());
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // temp = temp->GetMethodAt(method_offset);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
+  // T9 = temp->GetEntryPoint();
+  __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
+  // T9();
+  __ Jalr(T9);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath
+                                                              : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary* locations = cls->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
+  if (cls->IsReferrersClass()) {
+    DCHECK(!cls->CanCallRuntime());
+    DCHECK(!cls->MustGenerateClinitCheck());
+    __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
+                      ArtMethod::DeclaringClassOffset().Int32Value());
+  } else {
+    DCHECK(cls->CanCallRuntime());
+    __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
+                      ArtMethod::DexCacheResolvedTypesOffset().Int32Value());
+    __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+        cls,
+        cls,
+        cls->GetDexPc(),
+        cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    __ Beqzc(out, slow_path->GetEntryLabel());
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
+    }
+  }
+}
+
+void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadException(HLoadException* load) {
+  GpuRegister out = load->GetLocations()->Out().AsRegister<GpuRegister>();
+  __ LoadFromOffset(kLoadUnsignedWord, out, TR, Thread::ExceptionOffset<kMips64WordSize>().Int32Value());
+  __ StoreToOffset(kStoreWord, ZERO, TR, Thread::ExceptionOffset<kMips64WordSize>().Int32Value());
+}
+
+void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) {
+  load->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = load->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
+  __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
+                    ArtMethod::DeclaringClassOffset().Int32Value());
+  __ LoadFromOffset(kLoadUnsignedWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+  __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  __ Beqzc(out, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLocal(HLocal* local) {
+  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+}
+
+void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter()
+                              ? QUICK_ENTRY_POINT(pLockObject)
+                              : QUICK_ENTRY_POINT(pUnlockObject),
+                          instruction,
+                          instruction->GetDexPc(),
+                          nullptr);
+  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+}
+
+void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
+  switch (mul->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMul(HMul* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ MulR6(dst, lhs, rhs);
+      else
+        __ Dmul(dst, lhs, rhs);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      if (type == Primitive::kPrimFloat)
+        __ MulS(dst, lhs, rhs);
+      else
+        __ MulD(dst, lhs, rhs);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected mul type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ Subu(dst, ZERO, src);
+      else
+        __ Dsubu(dst, ZERO, src);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      if (type == Primitive::kPrimFloat)
+        __ NegS(dst, src);
+      else
+        __ NegD(dst, src);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected neg type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  // Move an uint16_t value to a register.
+  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
+  codegen_->InvokeRuntime(
+      GetThreadOffset<kMips64WordSize>(instruction->GetEntrypoint()).Int32Value(),
+      instruction,
+      instruction->GetDexPc(),
+      nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
+}
+
+void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  // Move an uint16_t value to a register.
+  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
+  codegen_->InvokeRuntime(
+      GetThreadOffset<kMips64WordSize>(instruction->GetEntrypoint()).Int32Value(),
+      instruction,
+      instruction->GetDexPc(),
+      nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+}
+
+void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNot(HNot* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+      __ Nor(dst, src, ZERO);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+  }
+}
+
+void LocationsBuilderMIPS64::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  __ Xori(locations->Out().AsRegister<GpuRegister>(),
+          locations->InAt(0).AsRegister<GpuRegister>(),
+          1);
+}
+
+void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+    return;
+  }
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Lw(ZERO, obj.AsRegister<GpuRegister>(), 0);
+  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS64(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Beqzc(obj.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNullCheck(HNullCheck* instruction) {
+  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+    GenerateImplicitNullCheck(instruction);
+  } else {
+    GenerateExplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitParallelMove(HParallelMove* instruction) {
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitParameterValue(HParameterValue* instruction
+                                                         ATTRIBUTE_UNUSED) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderMIPS64::VisitCurrentMethod(HCurrentMethod* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCurrentMethod(HCurrentMethod* instruction
+                                                        ATTRIBUTE_UNUSED) {
+  // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind =
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ ModR6(dst, lhs, rhs);
+      else
+        __ Dmod(dst, lhs, rhs);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
+                                                             : QUICK_ENTRY_POINT(pFmod);
+      codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+  memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
+void LocationsBuilderMIPS64::VisitReturn(HReturn* ret) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
+  Primitive::Type return_type = ret->InputAt(0)->GetType();
+  locations->SetInAt(0, Mips64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderMIPS64::VisitReturnVoid(HReturnVoid* ret) {
+  ret->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderMIPS64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderMIPS64::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  Primitive::Type field_type = store->InputAt(1)->GetType();
+  switch (field_type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << field_type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
+}
+
+void LocationsBuilderMIPS64::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void LocationsBuilderMIPS64::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitThrow(HThrow* instruction) {
+  codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
+                          instruction,
+                          instruction->GetDexPc(),
+                          nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
+  Primitive::Type input_type = conversion->GetInputType();
+  Primitive::Type result_type = conversion->GetResultType();
+  DCHECK_NE(input_type, result_type);
+
+  if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
+      (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
+    LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+  }
+
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+    call_kind = LocationSummary::kCall;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
+  if (call_kind == LocationSummary::kNoCall) {
+    if (Primitive::IsFloatingPointType(input_type)) {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(0, Location::RequiresRegister());
+    }
+
+    if (Primitive::IsFloatingPointType(result_type)) {
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+    } else {
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    }
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+
+    if (Primitive::IsFloatingPointType(input_type)) {
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+    } else {
+      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    }
+
+    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
+  LocationSummary* locations = conversion->GetLocations();
+  Primitive::Type result_type = conversion->GetResultType();
+  Primitive::Type input_type = conversion->GetInputType();
+
+  DCHECK_NE(input_type, result_type);
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+
+    switch (result_type) {
+      case Primitive::kPrimChar:
+        __ Andi(dst, src, 0xFFFF);
+        break;
+      case Primitive::kPrimByte:
+        // long is never converted into types narrower than int directly,
+        // so SEB and SEH can be used without ever causing unpredictable results
+        // on 64-bit inputs
+        DCHECK(input_type != Primitive::kPrimLong);
+        __ Seb(dst, src);
+        break;
+      case Primitive::kPrimShort:
+        // long is never converted into types narrower than int directly,
+        // so SEB and SEH can be used without ever causing unpredictable results
+        // on 64-bit inputs
+        DCHECK(input_type != Primitive::kPrimLong);
+        __ Seh(dst, src);
+        break;
+      case Primitive::kPrimInt:
+      case Primitive::kPrimLong:
+        // Sign-extend 32-bit int into bits 32 through 63 for
+        // int-to-long and long-to-int conversions
+        __ Sll(dst, src, 0);
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type conversion from " << input_type
+                   << " to " << result_type;
+    }
+  } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
+    if (input_type != Primitive::kPrimLong) {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+      __ Mtc1(src, FTMP);
+      if (result_type == Primitive::kPrimFloat) {
+        __ Cvtsw(dst, FTMP);
+      } else {
+        __ Cvtdw(dst, FTMP);
+      }
+    } else {
+      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                    : QUICK_ENTRY_POINT(pL2d);
+      codegen_->InvokeRuntime(entry_offset,
+                              conversion,
+                              conversion->GetDexPc(),
+                              nullptr);
+    }
+  } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
+    CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+    int32_t entry_offset;
+    if (result_type != Primitive::kPrimLong) {
+      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
+                                                           : QUICK_ENTRY_POINT(pD2iz);
+    } else {
+      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                           : QUICK_ENTRY_POINT(pD2l);
+    }
+    codegen_->InvokeRuntime(entry_offset,
+                            conversion,
+                            conversion->GetDexPc(),
+                            nullptr);
+  } else if (Primitive::IsFloatingPointType(result_type) &&
+             Primitive::IsFloatingPointType(input_type)) {
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    if (result_type == Primitive::kPrimFloat) {
+      __ Cvtsd(dst, src);
+    } else {
+      __ Cvtds(dst, src);
+    }
+  } else {
+    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+                << " to " << result_type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void LocationsBuilderMIPS64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
new file mode 100644
index 0000000000..ec36496a4f
--- /dev/null
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
+
+#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
+#include "nodes.h"
+#include "parallel_move_resolver.h"
+#include "utils/mips64/assembler_mips64.h"
+
+namespace art {
+namespace mips64 {
+
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kMips64WordSize = kMips64PointerSize;
+
+
+// InvokeDexCallingConvention registers
+
+static constexpr GpuRegister kParameterCoreRegisters[] =
+    { A1, A2, A3, A4, A5, A6, A7 };
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+static constexpr FpuRegister kParameterFpuRegisters[] =
+    { F13, F14, F15, F16, F17, F18, F19 };
+static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+
+
+// InvokeRuntimeCallingConvention registers
+
+static constexpr GpuRegister kRuntimeParameterCoreRegisters[] =
+    { A0, A1, A2, A3, A4, A5, A6, A7 };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+static constexpr FpuRegister kRuntimeParameterFpuRegisters[] =
+    { F12, F13, F14, F15, F16, F17, F18, F19 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
+
+
+static constexpr GpuRegister kCoreCalleeSaves[] =
+    { S0, S1, S2, S3, S4, S5, S6, S7, GP, S8, RA };  // TODO: review
+static constexpr FpuRegister kFpuCalleeSaves[] =
+    { F24, F25, F26, F27, F28, F29, F30, F31 };
+
+
+class CodeGeneratorMIPS64;
+
+class InvokeDexCallingConvention : public CallingConvention<GpuRegister, FpuRegister> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFpuRegisters,
+                          kParameterFpuRegistersLength,
+                          kMips64PointerSize) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitorMIPS64() {}
+  virtual ~InvokeDexCallingConventionVisitorMIPS64() {}
+
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
+  Location GetMethodLocation() const OVERRIDE;
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorMIPS64);
+};
+
+class InvokeRuntimeCallingConvention : public CallingConvention<GpuRegister, FpuRegister> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength,
+                          kMips64PointerSize) {}
+
+  Location GetReturnLocation(Primitive::Type return_type);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap {
+ public:
+  ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen)
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+  void EmitMove(size_t index) OVERRIDE;
+  void EmitSwap(size_t index) OVERRIDE;
+  void SpillScratch(int reg) OVERRIDE;
+  void RestoreScratch(int reg) OVERRIDE;
+
+  void Exchange(int index1, int index2, bool double_slot);
+
+  Mips64Assembler* GetAssembler() const;
+
+ private:
+  CodeGeneratorMIPS64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverMIPS64);
+};
+
+class SlowPathCodeMIPS64 : public SlowPathCode {
+ public:
+  SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
+
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  Label entry_label_;
+  Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64);
+};
+
+class LocationsBuilderMIPS64 : public HGraphVisitor {
+ public:
+  LocationsBuilderMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr);
+
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleInvoke(HInvoke* invoke);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_;
+
+  CodeGeneratorMIPS64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
+};
+
+class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+ public:
+  InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr);
+
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  Mips64Assembler* GetAssembler() const { return assembler_; }
+
+ private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             Label* true_target,
+                             Label* false_target,
+                             Label* always_true_target);
+
+  Mips64Assembler* const assembler_;
+  CodeGeneratorMIPS64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorMIPS64);
+};
+
+class CodeGeneratorMIPS64 : public CodeGenerator {
+ public:
+  CodeGeneratorMIPS64(HGraph* graph,
+                      const Mips64InstructionSetFeatures& isa_features,
+                      const CompilerOptions& compiler_options);
+  virtual ~CodeGeneratorMIPS64() {}
+
+  void GenerateFrameEntry() OVERRIDE;
+  void GenerateFrameExit() OVERRIDE;
+
+  void Bind(HBasicBlock* block) OVERRIDE;
+
+  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+
+  size_t GetWordSize() const OVERRIDE { return kMips64WordSize; }
+
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
+
+  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+    return GetLabelOf(block)->Position();
+  }
+
+  HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+  HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+  Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
+  const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
+
+  void MarkGCCard(GpuRegister object, GpuRegister value);
+
+  // Register allocation.
+
+  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
+  // AllocateFreeRegister() is only used when allocating registers locally
+  // during CompileBaseline().
+  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+
+  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+
+  void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+  void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+  InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips64; }
+
+  const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
+  Label* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+  }
+
+  void Initialize() OVERRIDE {
+    block_labels_.SetSize(GetGraph()->GetBlocks().Size());
+  }
+
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
+
+  // Code generation helpers.
+
+  void MoveLocation(Location destination, Location source, Primitive::Type type);
+
+  void SwapLocations(Location loc1, Location loc2, Primitive::Type type);
+
+  // Generate code to invoke a runtime entry point.
+  void InvokeRuntime(int32_t offset,
+                     HInstruction* instruction,
+                     uint32_t dex_pc,
+                     SlowPathCode* slow_path);
+
+  ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
+
+  bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
+
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
+
+ private:
+  // Labels for each block that will be compiled.
+  GrowableArray<Label> block_labels_;
+  Label frame_entry_label_;
+  LocationsBuilderMIPS64 location_builder_;
+  InstructionCodeGeneratorMIPS64 instruction_visitor_;
+  ParallelMoveResolverMIPS64 move_resolver_;
+  Mips64Assembler assembler_;
+  const Mips64InstructionSetFeatures& isa_features_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS64);
+};
+
+}  // namespace mips64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e39a1c2bd5..4d106c4fa4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -56,6 +56,8 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 {
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
+
  private:
   HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
@@ -71,6 +73,8 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 {
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
@@ -90,6 +94,8 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; }
+
  private:
   Register reg_;
   bool is_div_;
@@ -122,6 +128,8 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -158,6 +166,8 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -188,6 +198,8 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -228,6 +240,8 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -293,6 +307,8 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -318,6 +334,8 @@ class DeoptimizationSlowPathX86 : public SlowPathCodeX86 {
     codegen->RecordPcInfo(instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 696d8d549e..1ad89c9bfc 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -245,6 +245,10 @@ class CodeGeneratorX86 : public CodeGenerator {
     return &assembler_;
   }
 
+  const X86Assembler& GetAssembler() const OVERRIDE {
+    return assembler_;
+  }
+
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     return GetLabelOf(block)->Position();
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4da5bc4e51..a8f57cc9ac 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -59,6 +59,8 @@ class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
+
  private:
   HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
@@ -75,6 +77,8 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
@@ -105,6 +109,8 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
+
  private:
   const CpuRegister cpu_reg_;
   const Primitive::Type type_;
@@ -140,6 +146,8 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -174,6 +182,8 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -217,6 +227,8 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -257,6 +269,8 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -312,6 +326,8 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -337,6 +353,8 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 {
     codegen->RecordPcInfo(instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
@@ -373,7 +391,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
   if (invoke->IsStringInit()) {
     CpuRegister reg = temp.AsRegister<CpuRegister>();
     // temp = thread->string_init_entrypoint
-    __ gs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset()));
+    __ gs()->movq(reg, Address::Absolute(invoke->GetStringInitOffset(), true));
     // (temp + offset_of_quick_compiled_code)()
     __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kX86_64WordSize).SizeValue()));
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 215754cd46..a18e89a3e7 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -245,6 +245,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     return &assembler_;
   }
 
+  const X86_64Assembler& GetAssembler() const OVERRIDE {
+    return assembler_;
+  }
+
   ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE {
     return &move_resolver_;
   }
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index bfed1a89de..ca85cf5fae 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -20,6 +20,8 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arch/arm/registers_arm.h"
 #include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/mips64/registers_mips64.h"
 #include "arch/x86/instruction_set_features_x86.h"
 #include "arch/x86/registers_x86.h"
 #include "arch/x86_64/instruction_set_features_x86_64.h"
@@ -27,6 +29,7 @@
 #include "builder.h"
 #include "code_generator_arm.h"
 #include "code_generator_arm64.h"
+#include "code_generator_mips64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
 #include "common_compiler_test.h"
@@ -40,6 +43,7 @@
 #include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
 
 #include "gtest/gtest.h"
@@ -172,6 +176,14 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
   if (kRuntimeISA == kArm64) {
     Run(allocator, codegenARM64, has_result, expected);
   }
+
+  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+      Mips64InstructionSetFeatures::FromCppDefines());
+  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
+  codegenMIPS64.CompileBaseline(&allocator, true);
+  if (kRuntimeISA == kMips64) {
+    Run(allocator, codegenMIPS64, has_result, expected);
+  }
 }
 
 template <typename Expected>
@@ -219,6 +231,11 @@ static void RunCodeOptimized(HGraph* graph,
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+  } else if (kRuntimeISA == kMips64) {
+    std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+        Mips64InstructionSetFeatures::FromCppDefines());
+    mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
+    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
 }
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 422223f5e0..11f6362294 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -627,8 +627,8 @@ TEST(ConstantFolding, ConstantCondition) {
     "  9: If(8)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  12: Goto 3\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
-    "  22: Phi(3, 5) [15]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
+    "  22: Phi(5, 3) [15]\n"
     "  15: Add(22, 3)\n"
     "  17: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 3209d3eb18..ee3a61aa0c 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -89,8 +89,8 @@ TEST(DeadCodeElimination, AdditionAndConditionalJump) {
     "  9: If(8)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  12: Goto 3\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
-    "  22: Phi(3, 5) [15]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
+    "  22: Phi(5, 3) [15]\n"
     "  15: Add(22, 3)\n"
     "  17: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
@@ -101,7 +101,7 @@ TEST(DeadCodeElimination, AdditionAndConditionalJump) {
   // Expected difference after dead code elimination.
   diff_t expected_diff = {
     { "  3: IntConstant [15, 22, 8]\n", "  3: IntConstant [22, 8]\n" },
-    { "  22: Phi(3, 5) [15]\n",         "  22: Phi(3, 5)\n" },
+    { "  22: Phi(5, 3) [15]\n",         "  22: Phi(5, 3)\n" },
     { "  15: Add(22, 3)\n",             removed }
   };
   std::string expected_after = Patch(expected_before, expected_diff);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index b64791788d..7d723ef13d 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -16,17 +16,21 @@
 
 #include "graph_visualizer.h"
 
+#include <dlfcn.h>
+
+#include <cctype>
+#include <sstream>
+
 #include "code_generator.h"
 #include "dead_code_elimination.h"
+#include "disassembler.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimization.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
 #include "ssa_liveness_analysis.h"
-
-#include <cctype>
-#include <sstream>
+#include "utils/assembler.h"
 
 namespace art {
 
@@ -87,6 +91,64 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) {
   }
 }
 
+typedef Disassembler* create_disasm_prototype(InstructionSet instruction_set,
+                                              DisassemblerOptions* options);
+class HGraphVisualizerDisassembler {
+ public:
+  HGraphVisualizerDisassembler(InstructionSet instruction_set, const uint8_t* base_address)
+      : instruction_set_(instruction_set), disassembler_(nullptr) {
+    libart_disassembler_handle_ =
+        dlopen(kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so", RTLD_NOW);
+    if (libart_disassembler_handle_ == nullptr) {
+      LOG(WARNING) << "Failed to dlopen libart-disassembler: " << dlerror();
+      return;
+    }
+    create_disasm_prototype* create_disassembler = reinterpret_cast<create_disasm_prototype*>(
+        dlsym(libart_disassembler_handle_, "create_disassembler"));
+    if (create_disassembler == nullptr) {
+      LOG(WARNING) << "Could not find create_disassembler entry: " << dlerror();
+      return;
+    }
+    // Reading the disassembly from 0x0 is easier, so we print relative
+    // addresses. We will only disassemble the code once everything has
+    // been generated, so we can read data in literal pools.
+    disassembler_ = std::unique_ptr<Disassembler>((*create_disassembler)(
+            instruction_set,
+            new DisassemblerOptions(/* absolute_addresses */ false,
+                                    base_address,
+                                    /* can_read_literals */ true)));
+  }
+
+  ~HGraphVisualizerDisassembler() {
+    // We need to call ~Disassembler() before we close the library.
+    disassembler_.reset();
+    if (libart_disassembler_handle_ != nullptr) {
+      dlclose(libart_disassembler_handle_);
+    }
+  }
+
+  void Disassemble(std::ostream& output, size_t start, size_t end) const {
+    if (disassembler_ == nullptr) {
+      return;
+    }
+
+    const uint8_t* base = disassembler_->GetDisassemblerOptions()->base_address_;
+    if (instruction_set_ == kThumb2) {
+      // ARM and Thumb-2 use the same disassembler. The bottom bit of the
+      // address is used to distinguish between the two.
+      base += 1;
+    }
+    disassembler_->Dump(output, base + start, base + end);
+  }
+
+ private:
+  InstructionSet instruction_set_;
+  std::unique_ptr<Disassembler> disassembler_;
+
+  void* libart_disassembler_handle_;
+};
+
+
 /**
  * HGraph visitor to generate a file suitable for the c1visualizer tool and IRHydra.
  */
@@ -96,12 +158,19 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
                           std::ostream& output,
                           const char* pass_name,
                           bool is_after_pass,
-                          const CodeGenerator& codegen)
+                          const CodeGenerator& codegen,
+                          const DisassemblyInformation* disasm_info = nullptr)
       : HGraphVisitor(graph),
         output_(output),
         pass_name_(pass_name),
         is_after_pass_(is_after_pass),
         codegen_(codegen),
+        disasm_info_(disasm_info),
+        disassembler_(disasm_info_ != nullptr
+                      ? new HGraphVisualizerDisassembler(
+                            codegen_.GetInstructionSet(),
+                            codegen_.GetAssembler().CodeBufferBaseAddress())
+                      : nullptr),
         indent_(0) {}
 
   void StartTag(const char* name) {
@@ -173,6 +242,9 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
       HBasicBlock* predecessor = block->GetPredecessors().Get(i);
       output_ << " \"B" << predecessor->GetBlockId() << "\" ";
     }
+    if (block->IsEntryBlock() && (disasm_info_ != nullptr)) {
+      output_ << " \"" << kDisassemblyBlockFrameEntry << "\" ";
+    }
     output_<< std::endl;
   }
 
@@ -183,6 +255,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
       HBasicBlock* successor = block->GetSuccessors().Get(i);
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
+    if (block->IsExitBlock() &&
+        (disasm_info_ != nullptr) &&
+        !disasm_info_->GetSlowPathIntervals().empty()) {
+      output_ << " \"" << kDisassemblyBlockSlowPaths << "\" ";
+    }
     output_<< std::endl;
   }
 
@@ -266,9 +343,9 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
     StartAttributeStream("kind") << barrier->GetBarrierKind();
   }
 
-  void VisitLoadClass(HLoadClass* load_cass) OVERRIDE {
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     StartAttributeStream("gen_clinit_check") << std::boolalpha
-        << load_cass->MustGenerateClinitCheck() << std::noboolalpha;
+        << load_class->MustGenerateClinitCheck() << std::noboolalpha;
   }
 
   void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
@@ -359,9 +436,13 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
                && is_after_pass_) {
       if (instruction->GetType() == Primitive::kPrimNot) {
         if (instruction->IsLoadClass()) {
+          ReferenceTypeInfo info = instruction->AsLoadClass()->GetLoadedClassRTI();
           ScopedObjectAccess soa(Thread::Current());
-          StartAttributeStream("klass")
-              << PrettyClass(instruction->AsLoadClass()->GetLoadedClassRTI().GetTypeHandle().Get());
+          if (info.GetTypeHandle().GetReference() != nullptr) {
+            StartAttributeStream("klass") << info.GetTypeHandle().Get();
+          } else {
+            StartAttributeStream("klass") << "unresolved";
+          }
         } else {
           ReferenceTypeInfo info = instruction->GetReferenceTypeInfo();
           if (info.IsTop()) {
@@ -374,10 +455,20 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
         }
       }
     }
+    if (disasm_info_ != nullptr) {
+      DCHECK(disassembler_ != nullptr);
+      // If the information is available, disassemble the code generated for
+      // this instruction.
+      auto it = disasm_info_->GetInstructionIntervals().find(instruction);
+      if (it != disasm_info_->GetInstructionIntervals().end()
+          && it->second.start != it->second.end) {
+        output_ << std::endl;
+        disassembler_->Disassemble(output_, it->second.start, it->second.end);
+      }
+    }
   }
 
   void PrintInstructions(const HInstructionList& list) {
-    const char* kEndInstructionMarker = "<|@";
     for (HInstructionIterator it(list); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       int bci = 0;
@@ -395,11 +486,83 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
     }
   }
 
+  void DumpStartOfDisassemblyBlock(const char* block_name,
+                                   int predecessor_index,
+                                   int successor_index) {
+    StartTag("block");
+    PrintProperty("name", block_name);
+    PrintInt("from_bci", -1);
+    PrintInt("to_bci", -1);
+    if (predecessor_index != -1) {
+      PrintProperty("predecessors", "B", predecessor_index);
+    } else {
+      PrintEmptyProperty("predecessors");
+    }
+    if (successor_index != -1) {
+      PrintProperty("successors", "B", successor_index);
+    } else {
+      PrintEmptyProperty("successors");
+    }
+    PrintEmptyProperty("xhandlers");
+    PrintEmptyProperty("flags");
+    StartTag("states");
+    StartTag("locals");
+    PrintInt("size", 0);
+    PrintProperty("method", "None");
+    EndTag("locals");
+    EndTag("states");
+    StartTag("HIR");
+  }
+
+  void DumpEndOfDisassemblyBlock() {
+    EndTag("HIR");
+    EndTag("block");
+  }
+
+  void DumpDisassemblyBlockForFrameEntry() {
+    DumpStartOfDisassemblyBlock(kDisassemblyBlockFrameEntry,
+                                -1,
+                                GetGraph()->GetEntryBlock()->GetBlockId());
+    output_ << "    0 0 disasm " << kDisassemblyBlockFrameEntry << " ";
+    GeneratedCodeInterval frame_entry = disasm_info_->GetFrameEntryInterval();
+    if (frame_entry.start != frame_entry.end) {
+      output_ << std::endl;
+      disassembler_->Disassemble(output_, frame_entry.start, frame_entry.end);
+    }
+    output_ << kEndInstructionMarker << std::endl;
+    DumpEndOfDisassemblyBlock();
+  }
+
+  void DumpDisassemblyBlockForSlowPaths() {
+    if (disasm_info_->GetSlowPathIntervals().empty()) {
+      return;
+    }
+    // If the graph has an exit block we attach the block for the slow paths
+    // after it. Else we just add the block to the graph without linking it to
+    // any other.
+    DumpStartOfDisassemblyBlock(
+        kDisassemblyBlockSlowPaths,
+        GetGraph()->HasExitBlock() ? GetGraph()->GetExitBlock()->GetBlockId() : -1,
+        -1);
+    for (SlowPathCodeInfo info : disasm_info_->GetSlowPathIntervals()) {
+      output_ << "    0 0 disasm " << info.slow_path->GetDescription() << std::endl;
+      disassembler_->Disassemble(output_, info.code_interval.start, info.code_interval.end);
+      output_ << kEndInstructionMarker << std::endl;
+    }
+    DumpEndOfDisassemblyBlock();
+  }
+
   void Run() {
     StartTag("cfg");
     std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)");
     PrintProperty("name", pass_desc.c_str());
+    if (disasm_info_ != nullptr) {
+      DumpDisassemblyBlockForFrameEntry();
+    }
     VisitInsertionOrder();
+    if (disasm_info_ != nullptr) {
+      DumpDisassemblyBlockForSlowPaths();
+    }
     EndTag("cfg");
   }
 
@@ -446,11 +609,17 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
     EndTag("block");
   }
 
+  static constexpr const char* const kEndInstructionMarker = "<|@";
+  static constexpr const char* const kDisassemblyBlockFrameEntry = "FrameEntry";
+  static constexpr const char* const kDisassemblyBlockSlowPaths = "SlowPaths";
+
  private:
   std::ostream& output_;
   const char* pass_name_;
   const bool is_after_pass_;
   const CodeGenerator& codegen_;
+  const DisassemblyInformation* disasm_info_;
+  std::unique_ptr<HGraphVisualizerDisassembler> disassembler_;
   size_t indent_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphVisualizerPrinter);
@@ -479,4 +648,13 @@ void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) cons
   }
 }
 
+void HGraphVisualizer::DumpGraphWithDisassembly() const {
+  DCHECK(output_ != nullptr);
+  if (!graph_->GetBlocks().IsEmpty()) {
+    HGraphVisualizerPrinter printer(
+        graph_, *output_, "disassembly", true, codegen_, codegen_.GetDisassemblyInformation());
+    printer.Run();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 513bceb369..b6b66df601 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -19,6 +19,8 @@
 
 #include <ostream>
 
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
 #include "base/value_object.h"
 
 namespace art {
@@ -26,11 +28,75 @@ namespace art {
 class CodeGenerator;
 class DexCompilationUnit;
 class HGraph;
+class HInstruction;
+class SlowPathCode;
 
 /**
  * This class outputs the HGraph in the C1visualizer format.
  * Note: Currently only works if the compiler is single threaded.
  */
+struct GeneratedCodeInterval {
+  size_t start;
+  size_t end;
+};
+
+struct SlowPathCodeInfo {
+  const SlowPathCode* slow_path;
+  GeneratedCodeInterval code_interval;
+};
+
+// This information is filled by the code generator. It will be used by the
+// graph visualizer to associate disassembly of the generated code with the
+// instructions and slow paths. We assume that the generated code follows the
+// following structure:
+//   - frame entry
+//   - instructions
+//   - slow paths
+class DisassemblyInformation {
+ public:
+  explicit DisassemblyInformation(ArenaAllocator* allocator)
+      : frame_entry_interval_({0, 0}),
+        instruction_intervals_(std::less<const HInstruction*>(), allocator->Adapter()),
+        slow_path_intervals_(allocator->Adapter()) {}
+
+  void SetFrameEntryInterval(size_t start, size_t end) {
+    frame_entry_interval_ = {start, end};
+  }
+
+  void AddInstructionInterval(HInstruction* instr, size_t start, size_t end) {
+    instruction_intervals_.Put(instr, {start, end});
+  }
+
+  void AddSlowPathInterval(SlowPathCode* slow_path, size_t start, size_t end) {
+    slow_path_intervals_.push_back({slow_path, {start, end}});
+  }
+
+  GeneratedCodeInterval GetFrameEntryInterval() const {
+    return frame_entry_interval_;
+  }
+
+  GeneratedCodeInterval* GetFrameEntryInterval() {
+    return &frame_entry_interval_;
+  }
+
+  const ArenaSafeMap<const HInstruction*, GeneratedCodeInterval>& GetInstructionIntervals() const {
+    return instruction_intervals_;
+  }
+
+  ArenaSafeMap<const HInstruction*, GeneratedCodeInterval>* GetInstructionIntervals() {
+    return &instruction_intervals_;
+  }
+
+  const ArenaVector<SlowPathCodeInfo>& GetSlowPathIntervals() const { return slow_path_intervals_; }
+
+  ArenaVector<SlowPathCodeInfo>* GetSlowPathIntervals() { return &slow_path_intervals_; }
+
+ private:
+  GeneratedCodeInterval frame_entry_interval_;
+  ArenaSafeMap<const HInstruction*, GeneratedCodeInterval> instruction_intervals_;
+  ArenaVector<SlowPathCodeInfo> slow_path_intervals_;
+};
+
 class HGraphVisualizer : public ValueObject {
  public:
   HGraphVisualizer(std::ostream* output,
@@ -39,6 +105,7 @@ class HGraphVisualizer : public ValueObject {
 
   void PrintHeader(const char* method_name) const;
   void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
+  void DumpGraphWithDisassembly() const;
 
  private:
   std::ostream* const output_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2daeeb3c0c..e375f7b1d3 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -312,17 +312,23 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
     HInstruction* input_value = equal->GetLeastConstantLeft();
     if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
       HBasicBlock* block = equal->GetBlock();
+      // We are comparing the boolean to a constant which is of type int and can
+      // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
         // Replace (bool_value == true) with bool_value
         equal->ReplaceWith(input_value);
         block->RemoveInstruction(equal);
         RecordSimplification();
-      } else {
+      } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value == false) with !bool_value
-        DCHECK(input_const->AsIntConstant()->IsZero());
         block->ReplaceAndRemoveInstructionWith(
             equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
         RecordSimplification();
+      } else {
+        // Replace (bool_value == integer_not_zero_nor_one_constant) with false
+        equal->ReplaceWith(GetGraph()->GetIntConstant(0));
+        block->RemoveInstruction(equal);
+        RecordSimplification();
       }
     }
   }
@@ -334,17 +340,23 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) {
     HInstruction* input_value = not_equal->GetLeastConstantLeft();
     if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
       HBasicBlock* block = not_equal->GetBlock();
+      // We are comparing the boolean to a constant which is of type int and can
+      // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
         // Replace (bool_value != true) with !bool_value
         block->ReplaceAndRemoveInstructionWith(
             not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
         RecordSimplification();
-      } else {
+      } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value != false) with bool_value
-        DCHECK(input_const->AsIntConstant()->IsZero());
         not_equal->ReplaceWith(input_value);
         block->RemoveInstruction(not_equal);
         RecordSimplification();
+      } else {
+        // Replace (bool_value != integer_not_zero_nor_one_constant) with true
+        not_equal->ReplaceWith(GetGraph()->GetIntConstant(1));
+        block->RemoveInstruction(not_equal);
+        RecordSimplification();
       }
     }
   }
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 749bedf99e..71fadfbcc2 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -121,6 +121,8 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM {
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index c108ad5daa..8bcb88b4ea 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -130,6 +130,8 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 424ac7c855..b04cc5cace 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -158,6 +158,8 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 891531435e..888c7b8037 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -149,6 +149,8 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 68c197e607..01eb2d7f86 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -180,8 +180,9 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
   HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc());
   AddBlock(new_block);
   new_block->AddInstruction(new (arena_) HGoto());
-  block->ReplaceSuccessor(successor, new_block);
-  new_block->AddSuccessor(successor);
+  // Use `InsertBetween` to ensure the predecessor index and successor index of
+  // `block` and `successor` are preserved.
+  new_block->InsertBetween(block, successor);
   if (successor->IsLoopHeader()) {
     // If we split at a back edge boundary, make the new block the back edge.
     HLoopInformation* info = successor->GetLoopInformation();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 9443653db7..26eee1c52e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -625,6 +625,20 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
     predecessors_.Put(predecessor_index, new_block);
   }
 
+  // Insert `this` between `predecessor` and `successor. This method
+  // preserves the indicies, and will update the first edge found between
+  // `predecessor` and `successor`.
+  void InsertBetween(HBasicBlock* predecessor, HBasicBlock* successor) {
+    size_t predecessor_index = successor->GetPredecessorIndexOf(predecessor);
+    DCHECK_NE(predecessor_index, static_cast<size_t>(-1));
+    size_t successor_index = predecessor->GetSuccessorIndexOf(successor);
+    DCHECK_NE(successor_index, static_cast<size_t>(-1));
+    successor->predecessors_.Put(predecessor_index, this);
+    predecessor->successors_.Put(successor_index, this);
+    successors_.Add(successor);
+    predecessors_.Add(predecessor);
+  }
+
   void RemovePredecessor(HBasicBlock* block) {
     predecessors_.Delete(block);
   }
@@ -2190,8 +2204,12 @@ class HCompare : public HBinaryOperation {
     kLtBias,  // return -1 for NaN comparisons
   };
 
-  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second, Bias bias)
-      : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias) {
+  HCompare(Primitive::Type type,
+           HInstruction* first,
+           HInstruction* second,
+           Bias bias,
+           uint32_t dex_pc)
+      : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias), dex_pc_(dex_pc) {
     DCHECK_EQ(type, first->GetType());
     DCHECK_EQ(type, second->GetType());
   }
@@ -2216,10 +2234,13 @@ class HCompare : public HBinaryOperation {
 
   bool IsGtBias() { return bias_ == kGtBias; }
 
+  uint32_t GetDexPc() const { return dex_pc_; }
+
   DECLARE_INSTRUCTION(Compare);
 
  private:
   const Bias bias_;
+  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
@@ -4027,6 +4048,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
     return source_.IsInvalid();
   }
 
+  Primitive::Type GetType() const { return type_; }
+
   bool Is64BitMove() const {
     return Primitive::Is64BitType(type_);
   }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8d43adaada..0c7b6f7093 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -92,19 +92,21 @@ class PassInfoPrinter : public ValueObject {
  public:
   PassInfoPrinter(HGraph* graph,
                   const char* method_name,
-                  const CodeGenerator& codegen,
+                  CodeGenerator* codegen,
                   std::ostream* visualizer_output,
                   CompilerDriver* compiler_driver)
       : method_name_(method_name),
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(method_name, true, true),
+        disasm_info_(graph->GetArena()),
         visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
-        visualizer_(visualizer_output, graph, codegen) {
+        visualizer_(visualizer_output, graph, *codegen) {
     if (strstr(method_name, kStringFilter) == nullptr) {
       timing_logger_enabled_ = visualizer_enabled_ = false;
     }
     if (visualizer_enabled_) {
       visualizer_.PrintHeader(method_name_);
+      codegen->SetDisassemblyInformation(&disasm_info_);
     }
   }
 
@@ -115,6 +117,12 @@ class PassInfoPrinter : public ValueObject {
     }
   }
 
+  void DumpDisassembly() const {
+    if (visualizer_enabled_) {
+      visualizer_.DumpGraphWithDisassembly();
+    }
+  }
+
  private:
   void StartPass(const char* pass_name) {
     // Dump graph first, then start timer.
@@ -141,6 +149,8 @@ class PassInfoPrinter : public ValueObject {
   bool timing_logger_enabled_;
   TimingLogger timing_logger_;
 
+  DisassemblyInformation disasm_info_;
+
   bool visualizer_enabled_;
   HGraphVisualizer visualizer_;
 
@@ -224,12 +234,13 @@ class OptimizingCompiler FINAL : public Compiler {
                                    CodeGenerator* codegen,
                                    CompilerDriver* driver,
                                    const DexCompilationUnit& dex_compilation_unit,
-                                   PassInfoPrinter* pass_info) const;
+                                   PassInfoPrinter* pass_info_printer) const;
 
   // Just compile without doing optimizations.
   CompiledMethod* CompileBaseline(CodeGenerator* codegen,
                                   CompilerDriver* driver,
-                                  const DexCompilationUnit& dex_compilation_unit) const;
+                                  const DexCompilationUnit& dex_compilation_unit,
+                                  PassInfoPrinter* pass_info_printer) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
@@ -290,6 +301,7 @@ bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
 static bool IsInstructionSetSupported(InstructionSet instruction_set) {
   return instruction_set == kArm64
       || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat)
+      || instruction_set == kMips64
       || instruction_set == kX86
       || instruction_set == kX86_64;
 }
@@ -428,7 +440,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
 
   MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
 
-  return CompiledMethod::SwapAllocCompiledMethod(
+  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
       ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -444,12 +456,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
       ArrayRef<const uint8_t>(),  // native_gc_map.
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>());
+  pass_info_printer->DumpDisassembly();
+  return compiled_method;
 }
 
 CompiledMethod* OptimizingCompiler::CompileBaseline(
     CodeGenerator* codegen,
     CompilerDriver* compiler_driver,
-    const DexCompilationUnit& dex_compilation_unit) const {
+    const DexCompilationUnit& dex_compilation_unit,
+    PassInfoPrinter* pass_info_printer) const {
   CodeVectorAllocator allocator;
   codegen->CompileBaseline(&allocator);
 
@@ -465,7 +480,7 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
   codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
   MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
-  return CompiledMethod::SwapAllocCompiledMethod(
+  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
       ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -481,6 +496,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
       AlignVectorSize(gc_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>());
+  pass_info_printer->DumpDisassembly();
+  return compiled_method;
 }
 
 CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
@@ -556,7 +573,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
 
   PassInfoPrinter pass_info_printer(graph,
                                     method_name.c_str(),
-                                    *codegen.get(),
+                                    codegen.get(),
                                     visualizer_output_.get(),
                                     compiler_driver);
 
@@ -616,7 +633,10 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
       MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
     }
 
-    return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit);
+    return CompileBaseline(codegen.get(),
+                           compiler_driver,
+                           dex_compilation_unit,
+                           &pass_info_printer);
   } else {
     return nullptr;
   }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index a048c856c5..67840984b1 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -222,9 +222,10 @@ void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
   mirror::DexCache* dex_cache = cl->FindDexCache(info.GetDexFile());
   ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache);
-  DCHECK(field != nullptr);
-  mirror::Class* klass = field->GetType<false>();
-  SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  if (field != nullptr) {
+    mirror::Class* klass = field->GetType<false>();
+    SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  }
 }
 
 void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
@@ -323,9 +324,10 @@ void RTPVisitor::VisitInvoke(HInvoke* instr) {
   mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile());
   ArtMethod* method = dex_cache->GetResolvedMethod(
       instr->GetDexMethodIndex(), cl->GetImagePointerSize());
-  DCHECK(method != nullptr);
-  mirror::Class* klass = method->GetReturnType(false);
-  SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  if (method != nullptr) {
+    mirror::Class* klass = method->GetReturnType(false);
+    SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  }
 }
 
 void RTPVisitor::VisitArrayGet(HArrayGet* instr) {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index e38e49cd19..7b23d020c2 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -80,6 +80,7 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UN
                                                 InstructionSet instruction_set) {
   return instruction_set == kArm64
       || instruction_set == kX86_64
+      || instruction_set == kMips64
       || instruction_set == kArm
       || instruction_set == kX86
       || instruction_set == kThumb2;
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index fb3e7d798c..0e8c058002 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -115,7 +115,7 @@ TEST(SsaTest, CFG1) {
     "  3: If(2)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  4: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
     "  5: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
     "  6: Exit\n"
@@ -145,8 +145,8 @@ TEST(SsaTest, CFG2) {
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
-    "  6: Phi(1, 0) [7]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
+    "  6: Phi(0, 1) [7]\n"
     "  7: Return(6)\n"
     "BasicBlock 4, pred: 3\n"
     "  8: Exit\n"
@@ -428,8 +428,8 @@ TEST(SsaTest, Loop7) {
     "  10: Goto\n"
     "BasicBlock 5, pred: 3, succ: 2\n"
     "  11: Goto\n"
-    "BasicBlock 6, pred: 4, 8, succ: 7\n"
-    "  12: Phi(2, 5) [13]\n"
+    "BasicBlock 6, pred: 8, 4, succ: 7\n"
+    "  12: Phi(5, 2) [13]\n"
     "  13: Return(12)\n"
     "BasicBlock 7, pred: 6\n"
     "  14: Exit\n"
@@ -480,7 +480,7 @@ TEST(SsaTest, LocalInIf) {
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
     "  6: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
     "  7: Exit\n"
@@ -517,7 +517,7 @@ TEST(SsaTest, MultiplePredecessors) {
     "  8: Add(0, 0)\n"
     "  9: Goto\n"
     // This block should not get a phi for local 1.
-    "BasicBlock 5, pred: 2, 4, 7, succ: 6\n"
+    "BasicBlock 5, pred: 2, 7, 4, succ: 6\n"
     "  10: ReturnVoid\n"
     "BasicBlock 6, pred: 5\n"
     "  11: Exit\n"
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
index fbc9d0d8fc..6104ccd758 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/output_stream_test.cc
@@ -47,11 +47,12 @@ class OutputStreamTest : public CommonRuntimeTest {
     CheckOffset(6);
     EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
     CheckOffset(10);
+    EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
   }
 
   void CheckTestOutput(const std::vector<uint8_t>& actual) {
     uint8_t expected[] = {
-        0, 0, 1, 2, 0, 0, 1, 2, 3, 4
+        0, 0, 1, 2, 0, 0, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6
     };
     EXPECT_EQ(sizeof(expected), actual.size());
     EXPECT_EQ(0, memcmp(expected, &actual[0], actual.size()));
@@ -75,11 +76,13 @@ TEST_F(OutputStreamTest, File) {
 
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
-  std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-  CHECK(file_output_stream.get() != nullptr);
-  BufferedOutputStream buffered_output_stream(file_output_stream.release());
-  SetOutputStream(buffered_output_stream);
-  GenerateTestOutput();
+  {
+    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
+    CHECK(file_output_stream.get() != nullptr);
+    BufferedOutputStream buffered_output_stream(file_output_stream.release());
+    SetOutputStream(buffered_output_stream);
+    GenerateTestOutput();
+  }
   std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
   EXPECT_TRUE(in.get() != nullptr);
   std::vector<uint8_t> actual(in->GetLength());
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index eb8de0620b..077579c882 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -44,6 +44,10 @@ size_t Arm64Assembler::CodeSize() const {
   return vixl_masm_->BufferCapacity() - vixl_masm_->RemainingBufferSpace();
 }
 
+const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
+  return vixl_masm_->GetStartAddress<uint8_t*>();
+}
+
 void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
   MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize());
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index b53c11bc24..db95537f93 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -77,7 +77,8 @@ class Arm64Assembler FINAL : public Assembler {
   void FinalizeCode() OVERRIDE;
 
   // Size of generated code.
-  size_t CodeSize() const;
+  size_t CodeSize() const OVERRIDE;
+  const uint8_t* CodeBufferBaseAddress() const OVERRIDE;
 
   // Copy instructions out of assembly buffer into the given region of memory.
   void FinalizeInstructions(const MemoryRegion& region);
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 0381af3956..ee2d594e6f 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -386,6 +386,7 @@ class Assembler {
 
   // Size of generated code
   virtual size_t CodeSize() const { return buffer_.Size(); }
+  virtual const uint8_t* CodeBufferBaseAddress() const { return buffer_.contents(); }
 
   // Copy instructions out of assembly buffer into the given region of memory
   virtual void FinalizeInstructions(const MemoryRegion& region) {
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index a8b55d1097..a192d2fdb6 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -25,9 +25,9 @@
 namespace art {
 namespace mips64 {
 
-void Mips64Assembler::Emit(int32_t value) {
+void Mips64Assembler::Emit(uint32_t value) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<int32_t>(value);
+  buffer_.Emit<uint32_t>(value);
 }
 
 void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
@@ -35,124 +35,62 @@ void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegis
   CHECK_NE(rs, kNoGpuRegister);
   CHECK_NE(rt, kNoGpuRegister);
   CHECK_NE(rd, kNoGpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     shamt << kShamtShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      static_cast<uint32_t>(rd) << kRdShift |
+                      shamt << kShamtShift |
+                      funct;
   Emit(encoding);
 }
 
 void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm) {
   CHECK_NE(rs, kNoGpuRegister);
   CHECK_NE(rt, kNoGpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      imm;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitJ(int opcode, int address) {
-  int32_t encoding = opcode << kOpcodeShift |
-                     address;
+void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) {
+  CHECK_NE(rs, kNoGpuRegister);
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      (imm21 & 0x1FFFFF);
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) {
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      (addr26 & 0x3FFFFFF);
   Emit(encoding);
 }
 
 void Mips64Assembler::EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd,
-int funct) {
+                             int funct) {
   CHECK_NE(ft, kNoFpuRegister);
   CHECK_NE(fs, kNoFpuRegister);
   CHECK_NE(fd, kNoFpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(ft) << kFtShift |
-                     static_cast<int32_t>(fs) << kFsShift |
-                     static_cast<int32_t>(fd) << kFdShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      static_cast<uint32_t>(fs) << kFsShift |
+                      static_cast<uint32_t>(fd) << kFdShift |
+                      funct;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm) {
-  CHECK_NE(rt, kNoFpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) {
+  CHECK_NE(ft, kNoFpuRegister);
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      imm;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the branch instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (equal) {
-    Beq(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  } else {
-    Bne(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  }
-}
-
-void Mips64Assembler::EmitJump(Label* label, bool link) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the jump instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (link) {
-    Jal((offset >> 2) & kJumpOffsetMask);
-  } else {
-    J((offset >> 2) & kJumpOffsetMask);
-  }
-}
-
-int32_t Mips64Assembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
-  CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt<POPCOUNT(kBranchOffsetMask)>(offset)) << offset;
-
-  // Properly preserve only the bits supported in the instruction.
-  offset >>= 2;
-  if (is_jump) {
-    offset &= kJumpOffsetMask;
-    return (inst & ~kJumpOffsetMask) | offset;
-  } else {
-    offset &= kBranchOffsetMask;
-    return (inst & ~kBranchOffsetMask) | offset;
-  }
-}
-
-int Mips64Assembler::DecodeBranchOffset(int32_t inst, bool is_jump) {
-  // Sign-extend, then left-shift by 2.
-  if (is_jump) {
-    return (((inst & kJumpOffsetMask) << 6) >> 4);
-  } else {
-    return (((inst & kBranchOffsetMask) << 16) >> 14);
-  }
-}
-
-void Mips64Assembler::Bind(Label* label, bool is_jump) {
-  CHECK(!label->IsBound());
-  int bound_pc = buffer_.Size();
-  while (label->IsLinked()) {
-    int32_t position = label->Position();
-    int32_t next = buffer_.Load<int32_t>(position);
-    int32_t offset = is_jump ? bound_pc - position : bound_pc - position - 4;
-    int32_t encoded = Mips64Assembler::EncodeBranchOffset(offset, next, is_jump);
-    buffer_.Store<int32_t>(position, encoded);
-    label->position_ = Mips64Assembler::DecodeBranchOffset(next, is_jump);
-  }
-  label->BindTo(bound_pc);
-}
-
 void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x20);
 }
@@ -169,6 +107,10 @@ void Mips64Assembler::Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x9, rs, rt, imm16);
 }
 
+void Mips64Assembler::Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x2d);
+}
+
 void Mips64Assembler::Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x19, rs, rt, imm16);
 }
@@ -181,22 +123,90 @@ void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
 
-void Mips64Assembler::Mult(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x2f);
+}
+
+void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
 }
 
-void Mips64Assembler::Multu(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
 }
 
-void Mips64Assembler::Div(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
 }
 
-void Mips64Assembler::Divu(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
 }
 
+void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0x1c, rs, rt, rd, 0, 2);
+}
+
+void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivR2(rs, rt);
+  Mflo(rd);
+}
+
+void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivR2(rs, rt);
+  Mfhi(rd);
+}
+
+void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivuR2(rs, rt);
+  Mflo(rd);
+}
+
+void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivuR2(rs, rt);
+  Mfhi(rd);
+}
+
+void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x18);
+}
+
+void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1a);
+}
+
+void Mips64Assembler::ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1a);
+}
+
+void Mips64Assembler::DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1b);
+}
+
+void Mips64Assembler::ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1b);
+}
+
+void Mips64Assembler::Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1c);
+}
+
+void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1e);
+}
+
+void Mips64Assembler::Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1e);
+}
+
+void Mips64Assembler::Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1f);
+}
+
+void Mips64Assembler::Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1f);
+}
+
 void Mips64Assembler::And(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x24);
 }
@@ -225,30 +235,80 @@ void Mips64Assembler::Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
-void Mips64Assembler::Sll(GpuRegister rd, GpuRegister rs, int shamt) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x00);
+void Mips64Assembler::Seb(GpuRegister rd, GpuRegister rt) {
+  EmitR(0x1f, static_cast<GpuRegister>(0), rt, rd, 0x10, 0x20);
 }
 
-void Mips64Assembler::Srl(GpuRegister rd, GpuRegister rs, int shamt) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x02);
+void Mips64Assembler::Seh(GpuRegister rd, GpuRegister rt) {
+  EmitR(0x1f, static_cast<GpuRegister>(0), rt, rd, 0x18, 0x20);
 }
 
-void Mips64Assembler::Sra(GpuRegister rd, GpuRegister rs, int shamt) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x03);
+void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size_less_one) {
+  DCHECK(0 <= pos && pos < 32) << pos;
+  DCHECK(0 <= size_less_one && size_less_one < 32) << size_less_one;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size_less_one), pos, 3);
 }
 
-void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Sll(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x00);
+}
+
+void Mips64Assembler::Srl(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x02);
+}
+
+void Mips64Assembler::Sra(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x03);
+}
+
+void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x04);
 }
 
-void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
-void Mips64Assembler::Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
+void Mips64Assembler::Dsll(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x38);
+}
+
+void Mips64Assembler::Dsrl(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3a);
+}
+
+void Mips64Assembler::Dsra(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3b);
+}
+
+void Mips64Assembler::Dsll32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3c);
+}
+
+void Mips64Assembler::Dsrl32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3e);
+}
+
+void Mips64Assembler::Dsra32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3f);
+}
+
+void Mips64Assembler::Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 0, 0x14);
+}
+
+void Mips64Assembler::Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 0, 0x16);
+}
+
+void Mips64Assembler::Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 0, 0x17);
+}
+
 void Mips64Assembler::Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x20, rs, rt, imm16);
 }
@@ -281,6 +341,19 @@ void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
   EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
 }
 
+void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) {
+  EmitI(1, rs, static_cast<GpuRegister>(6), imm16);
+}
+
+void Mips64Assembler::Dati(GpuRegister rs, uint16_t imm16) {
+  EmitI(1, rs, static_cast<GpuRegister>(0x1e), imm16);
+}
+
+void Mips64Assembler::Sync(uint32_t stype) {
+  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0),
+           static_cast<GpuRegister>(0), stype & 0x1f, 0xf);
+}
+
 void Mips64Assembler::Mfhi(GpuRegister rd) {
   EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
 }
@@ -321,34 +394,121 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0xb, rs, rt, imm16);
 }
 
-void Mips64Assembler::Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
   Nop();
 }
 
-void Mips64Assembler::Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   EmitI(0x5, rs, rt, imm16);
   Nop();
 }
 
-void Mips64Assembler::J(uint32_t address) {
-  EmitJ(0x2, address);
+void Mips64Assembler::J(uint32_t addr26) {
+  EmitJ(0x2, addr26);
   Nop();
 }
 
-void Mips64Assembler::Jal(uint32_t address) {
-  EmitJ(0x2, address);
+void Mips64Assembler::Jal(uint32_t addr26) {
+  EmitJ(0x3, addr26);
   Nop();
 }
 
-void Mips64Assembler::Jr(GpuRegister rs) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), 0, 0x09);  // Jalr zero, rs
+void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09);
   Nop();
 }
 
 void Mips64Assembler::Jalr(GpuRegister rs) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), RA, 0, 0x09);
-  Nop();
+  Jalr(RA, rs);
+}
+
+void Mips64Assembler::Jr(GpuRegister rs) {
+  Jalr(ZERO, rs);
+}
+
+void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) {
+  EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16);
+}
+
+void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Jialc(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x3E, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x17, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bltzc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, rt, rt, imm16);
+}
+
+void Mips64Assembler::Bgtzc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x16, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bgezc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, rt, rt, imm16);
+}
+
+void Mips64Assembler::Blezc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x7, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x6, rs, rt, imm16);
+}
+
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) {
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x36, rs, imm21);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) {
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x3E, rs, imm21);
 }
 
 void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
@@ -368,23 +528,19 @@ void Mips64Assembler::DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
 }
 
 void Mips64Assembler::AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x0);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x0);
 }
 
 void Mips64Assembler::SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x1);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x1);
 }
 
 void Mips64Assembler::MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x2);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x2);
 }
 
 void Mips64Assembler::DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x3);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x3);
 }
 
 void Mips64Assembler::MovS(FpuRegister fd, FpuRegister fs) {
@@ -392,16 +548,47 @@ void Mips64Assembler::MovS(FpuRegister fd, FpuRegister fs) {
 }
 
 void Mips64Assembler::MovD(FpuRegister fd, FpuRegister fs) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x6);
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x6);
+}
+
+void Mips64Assembler::NegS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x7);
+}
+
+void Mips64Assembler::NegD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x7);
+}
+
+void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
+}
+
+void Mips64Assembler::Cvtdw(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x21);
+}
+
+void Mips64Assembler::Cvtsd(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x20);
+}
+
+void Mips64Assembler::Cvtds(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x21);
 }
 
 void Mips64Assembler::Mfc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
-void Mips64Assembler::Mtc1(FpuRegister ft, GpuRegister rs) {
-  EmitFR(0x11, 0x04, ft, static_cast<FpuRegister>(rs), static_cast<FpuRegister>(0), 0x0);
+void Mips64Assembler::Mtc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x04, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
+void Mips64Assembler::Dmfc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x01, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
+void Mips64Assembler::Dmtc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x05, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
 void Mips64Assembler::Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) {
@@ -430,45 +617,330 @@ void Mips64Assembler::Nop() {
         static_cast<GpuRegister>(0), 0, 0x0);
 }
 
-void Mips64Assembler::Move(GpuRegister rt, GpuRegister rs) {
-  EmitI(0x19, rs, rt, 0);   // Daddiu
+void Mips64Assembler::Move(GpuRegister rd, GpuRegister rs) {
+  Or(rd, rs, ZERO);
 }
 
-void Mips64Assembler::Clear(GpuRegister rt) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rt, 0, 0x20);
+void Mips64Assembler::Clear(GpuRegister rd) {
+  Move(rd, ZERO);
 }
 
-void Mips64Assembler::Not(GpuRegister rt, GpuRegister rs) {
-  EmitR(0, static_cast<GpuRegister>(0), rs, rt, 0, 0x27);
+void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) {
+  Nor(rd, rs, ZERO);
 }
 
-void Mips64Assembler::Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  Mult(rs, rt);
-  Mflo(rd);
+void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
+  if (IsUint<16>(value)) {
+    // Use OR with (unsigned) immediate to encode 16b unsigned int.
+    Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    // Use ADD with (signed) immediate to encode 16b signed int.
+    Addiu(rd, ZERO, value);
+  } else {
+    Lui(rd, value >> 16);
+    if (value & 0xFFFF)
+      Ori(rd, rd, value);
+  }
 }
 
-void Mips64Assembler::Div(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  Div(rs, rt);
-  Mflo(rd);
+void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) {
+  int bit31 = (value & UINT64_C(0x80000000)) != 0;
+
+  // Loads with 1 instruction.
+  if (IsUint<16>(value)) {
+    Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    Daddiu(rd, ZERO, value);
+  } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) {
+    Lui(rd, value >> 16);
+  } else if (IsInt<32>(value)) {
+    // Loads with 2 instructions.
+    Lui(rd, value >> 16);
+    Ori(rd, rd, value);
+  } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) {
+    Ori(rd, ZERO, value);
+    Dahi(rd, value >> 32);
+  } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) {
+    Ori(rd, ZERO, value);
+    Dati(rd, value >> 48);
+  } else if ((value & 0xFFFF) == 0 &&
+             (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
+    Lui(rd, value >> 16);
+    Dahi(rd, (value >> 32) + bit31);
+  } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
+    Lui(rd, value >> 16);
+    Dati(rd, (value >> 48) + bit31);
+  } else {
+    int shift_cnt = CTZ(value);
+    int64_t tmp = value >> shift_cnt;
+    if (IsUint<16>(tmp)) {
+      Ori(rd, ZERO, tmp);
+      if (shift_cnt < 32)
+        Dsll(rd, rd, shift_cnt);
+      else
+        Dsll32(rd, rd, shift_cnt & 31);
+    } else if (IsInt<16>(tmp)) {
+      Daddiu(rd, ZERO, tmp);
+      if (shift_cnt < 32)
+        Dsll(rd, rd, shift_cnt);
+      else
+        Dsll32(rd, rd, shift_cnt & 31);
+    } else if (IsInt<32>(tmp)) {
+      // Loads with 3 instructions.
+      Lui(rd, tmp >> 16);
+      Ori(rd, rd, tmp);
+      if (shift_cnt < 32)
+        Dsll(rd, rd, shift_cnt);
+      else
+        Dsll32(rd, rd, shift_cnt & 31);
+    } else {
+      shift_cnt = 16 + CTZ(value >> 16);
+      tmp = value >> shift_cnt;
+      if (IsUint<16>(tmp)) {
+        Ori(rd, ZERO, tmp);
+        if (shift_cnt < 32)
+          Dsll(rd, rd, shift_cnt);
+        else
+          Dsll32(rd, rd, shift_cnt & 31);
+        Ori(rd, rd, value);
+      } else if (IsInt<16>(tmp)) {
+        Daddiu(rd, ZERO, tmp);
+        if (shift_cnt < 32)
+          Dsll(rd, rd, shift_cnt);
+        else
+          Dsll32(rd, rd, shift_cnt & 31);
+        Ori(rd, rd, value);
+      } else {
+        // Loads with 3-4 instructions.
+        uint64_t tmp2 = value;
+        bool used_lui = false;
+        if (((tmp2 >> 16) & 0xFFFF) != 0 || (tmp2 & 0xFFFFFFFF) == 0) {
+          Lui(rd, tmp2 >> 16);
+          used_lui = true;
+        }
+        if ((tmp2 & 0xFFFF) != 0) {
+          if (used_lui)
+            Ori(rd, rd, tmp2);
+          else
+            Ori(rd, ZERO, tmp2);
+        }
+        if (bit31) {
+          tmp2 += UINT64_C(0x100000000);
+        }
+        if (((tmp2 >> 32) & 0xFFFF) != 0) {
+          Dahi(rd, tmp2 >> 32);
+        }
+        if (tmp2 & UINT64_C(0x800000000000)) {
+          tmp2 += UINT64_C(0x1000000000000);
+        }
+        if ((tmp2 >> 48) != 0) {
+          Dati(rd, tmp2 >> 48);
+        }
+      }
+    }
+  }
 }
 
-void Mips64Assembler::Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  Div(rs, rt);
-  Mfhi(rd);
+void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) {
+  if (IsInt<16>(value)) {
+    Addiu(rt, rs, value);
+  } else {
+    LoadConst32(rtmp, value);
+    Addu(rt, rs, rtmp);
+  }
 }
 
-void Mips64Assembler::AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value) {
-  CHECK((value >= -32768) && (value <= 32766));
-  Daddiu(rt, rs, value);
+void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
+  if (IsInt<16>(value)) {
+    Daddiu(rt, rs, value);
+  } else {
+    LoadConst64(rtmp, value);
+    Daddu(rt, rs, rtmp);
+  }
 }
 
-void Mips64Assembler::LoadImmediate64(GpuRegister rt, int32_t value) {
-  CHECK((value >= -32768) && (value <= 32766));
-  Daddiu(rt, ZERO, value);
+//
+// MIPS64R6 branches
+//
+//
+// Unconditional (pc + 32-bit signed offset):
+//
+//   auipc    at, ofs_high
+//   jic      at, ofs_low
+//   // no delay/forbidden slot
+//
+//
+// Conditional (pc + 32-bit signed offset):
+//
+//   b<cond>c   reg, +2      // skip next 2 instructions
+//   auipc      at, ofs_high
+//   jic        at, ofs_low
+//   // no delay/forbidden slot
+//
+//
+// Unconditional (pc + 32-bit signed offset) and link:
+//
+//   auipc    reg, ofs_high
+//   daddiu   reg, ofs_low
+//   jialc    reg, 0
+//   // no delay/forbidden slot
+//
+//
+// TODO: use shorter instruction sequences whenever possible.
+//
+
+void Mips64Assembler::Bind(Label* label) {
+  CHECK(!label->IsBound());
+  int32_t bound_pc = buffer_.Size();
+
+  // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label.
+  // Embed the previously unknown pc-relative addresses in them.
+  while (label->IsLinked()) {
+    int32_t position = label->Position();
+    // Extract the branch (instruction pair)
+    uint32_t auipc = buffer_.Load<uint32_t>(position);
+    uint32_t jic = buffer_.Load<uint32_t>(position + 4);  // actually, jic or daddiu
+
+    // Extract the location of the previous pair in the list (walking the list backwards;
+    // the previous pair location was stored in the immediate operands of the instructions)
+    int32_t prev = (auipc << 16) | (jic & 0xFFFF);
+
+    // Get the pc-relative address
+    uint32_t offset = bound_pc - position;
+    offset += (offset & 0x8000) << 1;  // account for sign extension in jic/daddiu
+
+    // Embed it in the two instructions
+    auipc = (auipc & 0xFFFF0000) | (offset >> 16);
+    jic = (jic & 0xFFFF0000) | (offset & 0xFFFF);
+
+    // Save the adjusted instructions
+    buffer_.Store<uint32_t>(position, auipc);
+    buffer_.Store<uint32_t>(position + 4, jic);
+
+    // On to the previous branch in the list...
+    label->position_ = prev;
+  }
+
+  // Now make the label object contain its own location
+  // (it will be used by the branches referring to and following this label)
+  label->BindTo(bound_pc);
+}
+
+void Mips64Assembler::B(Label* label) {
+  if (label->IsBound()) {
+    // Branch backwards (to a preceding label), distance is known
+    uint32_t offset = label->Position() - buffer_.Size();
+    CHECK_LE(static_cast<int32_t>(offset), 0);
+    offset += (offset & 0x8000) << 1;  // account for sign extension in jic
+    Auipc(AT, offset >> 16);
+    Jic(AT, offset);
+  } else {
+    // Branch forward (to a following label), distance is unknown
+    int32_t position = buffer_.Size();
+    // The first branch forward will have 0 in its pc-relative address (copied from label's
+    // position). It will be the terminator of the list of forward-reaching branches.
+    uint32_t prev = label->position_;
+    Auipc(AT, prev >> 16);
+    Jic(AT, prev);
+    // Now make the link object point to the location of this branch
+    // (this forms a linked list of branches preceding this label)
+    label->LinkTo(position);
+  }
+}
+
+void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) {
+  if (label->IsBound()) {
+    // Branch backwards (to a preceding label), distance is known
+    uint32_t offset = label->Position() - buffer_.Size();
+    CHECK_LE(static_cast<int32_t>(offset), 0);
+    offset += (offset & 0x8000) << 1;  // account for sign extension in daddiu
+    Auipc(indirect_reg, offset >> 16);
+    Daddiu(indirect_reg, indirect_reg, offset);
+    Jialc(indirect_reg, 0);
+  } else {
+    // Branch forward (to a following label), distance is unknown
+    int32_t position = buffer_.Size();
+    // The first branch forward will have 0 in its pc-relative address (copied from label's
+    // position). It will be the terminator of the list of forward-reaching branches.
+    uint32_t prev = label->position_;
+    Auipc(indirect_reg, prev >> 16);
+    Daddiu(indirect_reg, indirect_reg, prev);
+    Jialc(indirect_reg, 0);
+    // Now make the link object point to the location of this branch
+    // (this forms a linked list of branches preceding this label)
+    label->LinkTo(position);
+  }
+}
+
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bgec(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) {
+  Bgezc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) {
+  Blezc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bltc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) {
+  Bltzc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Blezc(GpuRegister rt, Label* label) {
+  Bgtzc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bgeuc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bltuc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bnec(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) {
+  Beqc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) {
+  Bnezc(rs, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) {
+  Beqzc(rs, 2);
+  B(label);
 }
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kLoadSignedByte:
       Lb(reg, base, offset);
@@ -489,23 +961,25 @@ void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuR
       Lwu(reg, base, offset);
       break;
     case kLoadDoubleword:
-      // TODO: alignment issues ???
       Ld(reg, base, offset);
       break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
   }
 }
 
 void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
                                         int32_t offset) {
-  CHECK((offset >= -32768) && (offset <= 32766));
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kLoadWord:
       Lwc1(reg, base, offset);
       break;
     case kLoadDoubleword:
-      // TODO: alignment issues ???
       Ldc1(reg, base, offset);
       break;
     default:
@@ -542,6 +1016,13 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register,
 
 void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
                                     int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kStoreByte:
       Sb(reg, base, offset);
@@ -553,7 +1034,6 @@ void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuR
       Sw(reg, base, offset);
       break;
     case kStoreDoubleword:
-      // TODO: alignment issues ???
       Sd(reg, base, offset);
       break;
     default:
@@ -563,6 +1043,13 @@ void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuR
 
 void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
                                        int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kStoreWord:
       Swc1(reg, base, offset);
@@ -613,10 +1100,12 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
       // only increment stack offset.
       offset += size;
     } else if (reg.IsFpuRegister()) {
-      StoreFpuToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsFpuRegister(), SP, offset);
+      StoreFpuToOffset((size == 4) ? kStoreWord : kStoreDoubleword,
+          reg.AsFpuRegister(), SP, offset);
       offset += size;
     } else if (reg.IsGpuRegister()) {
-      StoreToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsGpuRegister(), SP, offset);
+      StoreToOffset((size == 4) ? kStoreWord : kStoreDoubleword,
+          reg.AsGpuRegister(), SP, offset);
       offset += size;
     }
   }
@@ -650,14 +1139,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size,
 }
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant64(SP, SP, -adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Daddiu64(SP, SP, static_cast<int32_t>(-adjust));
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant64(SP, SP, adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Daddiu64(SP, SP, static_cast<int32_t>(adjust));
   cfi_.AdjustCFAOffset(-adjust);
 }
 
@@ -702,7 +1191,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
                                             ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  LoadImmediate64(scratch.AsGpuRegister(), imm);
+  LoadConst32(scratch.AsGpuRegister(), imm);
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
@@ -710,7 +1199,9 @@ void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t im
                                                ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  LoadImmediate64(scratch.AsGpuRegister(), imm);
+  // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?).
+  // Is this function even referenced anywhere else in the code?
+  LoadConst32(scratch.AsGpuRegister(), imm);
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
 }
 
@@ -719,7 +1210,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  AddConstant64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
+  Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
@@ -757,20 +1248,24 @@ void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, Membe
   LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
   if (kPoisonHeapReferences && poison_reference) {
-    Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
+    // TODO: review
+    // Negate the 32-bit ref
+    Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
+    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 31);
   }
 }
 
 void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                               Offset offs) {
+                                 Offset offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
-  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()) << dest;
+  CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                         ThreadOffset<8> offs) {
+                                             ThreadOffset<8> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -849,7 +1344,7 @@ void Mips64Assembler::Copy(FrameOffset dest, FrameOffset src,
 }
 
 void Mips64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                         ManagedRegister mscratch, size_t size) {
+                           ManagedRegister mscratch, size_t size) {
   GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
@@ -866,7 +1361,7 @@ void Mips64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset sr
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                         ManagedRegister mscratch, size_t size) {
+                           ManagedRegister mscratch, size_t size) {
   GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
@@ -888,8 +1383,8 @@ void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offse
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                         ManagedRegister src, Offset src_offset,
-                         ManagedRegister mscratch, size_t size) {
+                           ManagedRegister src, Offset src_offset,
+                           ManagedRegister mscratch, size_t size) {
   GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
@@ -912,12 +1407,14 @@ void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOf
 }
 
 void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+  // TODO: sync?
   UNIMPLEMENTED(FATAL) << "no mips64 implementation";
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister min_reg, bool null_allowed) {
+                                             FrameOffset handle_scope_offset,
+                                             ManagedRegister min_reg,
+                                             bool null_allowed) {
   Mips64ManagedRegister out_reg = mout_reg.AsMips64();
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
@@ -933,20 +1430,20 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
       in_reg = out_reg;
     }
     if (!out_reg.Equals(in_reg)) {
-      LoadImmediate64(out_reg.AsGpuRegister(), 0);
+      LoadConst32(out_reg.AsGpuRegister(), 0);
     }
-    EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true);
-    AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    Beqzc(in_reg.AsGpuRegister(), &null_arg);
+    Daddiu64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Daddiu64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
   }
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister mscratch,
-                                    bool null_allowed) {
+                                             FrameOffset handle_scope_offset,
+                                             ManagedRegister mscratch,
+                                             bool null_allowed) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
@@ -956,30 +1453,30 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    EmitBranch(scratch.AsGpuRegister(), ZERO, &null_arg, true);
-    AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    Beqzc(scratch.AsGpuRegister(), &null_arg);
+    Daddiu64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Daddiu64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
   }
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, out_off.Int32Value());
 }
 
 // Given a handle scope entry, load the associated reference.
 void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                          ManagedRegister min_reg) {
+                                                   ManagedRegister min_reg) {
   Mips64ManagedRegister out_reg = mout_reg.AsMips64();
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   CHECK(in_reg.IsGpuRegister()) << in_reg;
   Label null_arg;
   if (!out_reg.Equals(in_reg)) {
-    LoadImmediate64(out_reg.AsGpuRegister(), 0);
+    LoadConst32(out_reg.AsGpuRegister(), 0);
   }
-  EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true);
+  Beqzc(in_reg.AsGpuRegister(), &null_arg);
   LoadFromOffset(kLoadDoubleword, out_reg.AsGpuRegister(),
                  in_reg.AsGpuRegister(), 0);
-  Bind(&null_arg, false);
+  Bind(&null_arg);
 }
 
 void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
@@ -1022,7 +1519,7 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
 }
 
 void Mips64Assembler::GetCurrentThread(FrameOffset offset,
-                                     ManagedRegister /*mscratch*/) {
+                                       ManagedRegister /*mscratch*/) {
   StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
 }
 
@@ -1032,13 +1529,13 @@ void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjus
   buffer_.EnqueueSlowPath(slow);
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  S1, Thread::ExceptionOffset<8>().Int32Value());
-  EmitBranch(scratch.AsGpuRegister(), ZERO, slow->Entry(), false);
+  Bnezc(scratch.AsGpuRegister(), slow->Entry());
 }
 
 void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
   Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
 #define __ sp_asm->
-  __ Bind(&entry_, false);
+  __ Bind(&entry_);
   if (stack_adjust_ != 0) {  // Fix up the frame.
     __ DecreaseFrameSize(stack_adjust_);
   }
@@ -1048,6 +1545,7 @@ void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
   // Set up call to Thread::Current()->pDeliverException
   __ LoadFromOffset(kLoadDoubleword, T9, S1,
                     QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+  // TODO: check T9 usage
   __ Jr(T9);
   // Call never returns
   __ Break();
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 38419abbac..88cc4bcd00 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -56,13 +56,31 @@ class Mips64Assembler FINAL : public Assembler {
   void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Mult(GpuRegister rs, GpuRegister rt);
-  void Multu(GpuRegister rs, GpuRegister rt);
-  void Div(GpuRegister rs, GpuRegister rt);
-  void Divu(GpuRegister rs, GpuRegister rt);
+  void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+
+  void MultR2(GpuRegister rs, GpuRegister rt);  // R2
+  void MultuR2(GpuRegister rs, GpuRegister rt);  // R2
+  void DivR2(GpuRegister rs, GpuRegister rt);  // R2
+  void DivuR2(GpuRegister rs, GpuRegister rt);  // R2
+  void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
 
   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -72,40 +90,72 @@ class Mips64Assembler FINAL : public Assembler {
   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
 
-  void Sll(GpuRegister rd, GpuRegister rs, int shamt);
-  void Srl(GpuRegister rd, GpuRegister rs, int shamt);
-  void Sra(GpuRegister rd, GpuRegister rs, int shamt);
-  void Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Seb(GpuRegister rd, GpuRegister rt);  // R2+
+  void Seh(GpuRegister rd, GpuRegister rt);  // R2+
+  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
+
+  void Sll(GpuRegister rd, GpuRegister rt, int shamt);
+  void Srl(GpuRegister rd, GpuRegister rt, int shamt);
+  void Sra(GpuRegister rd, GpuRegister rt, int shamt);
+  void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Dsll(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsrl(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsra(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsll32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsra32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
+  void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
+  void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
 
   void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
-  void Mfhi(GpuRegister rd);
-  void Mflo(GpuRegister rd);
+  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Sync(uint32_t stype);
+  void Mfhi(GpuRegister rd);  // R2
+  void Mflo(GpuRegister rd);  // R2
 
   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
 
   void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
 
-  void Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void J(uint32_t address);
-  void Jal(uint32_t address);
-  void Jr(GpuRegister rs);
+  void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void J(uint32_t addr26);
+  void Jal(uint32_t addr26);
+  void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
+  void Jr(GpuRegister rs);
+  void Auipc(GpuRegister rs, uint16_t imm16);  // R6
+  void Jic(GpuRegister rt, uint16_t imm16);  // R6
+  void Jialc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bltzc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bgtzc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bgezc(GpuRegister rt, uint16_t imm16);  // R6
+  void Blezc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Beqzc(GpuRegister rs, uint32_t imm21);  // R6
+  void Bnezc(GpuRegister rs, uint32_t imm21);  // R6
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -117,9 +167,18 @@ class Mips64Assembler FINAL : public Assembler {
   void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MovS(FpuRegister fd, FpuRegister fs);
   void MovD(FpuRegister fd, FpuRegister fs);
+  void NegS(FpuRegister fd, FpuRegister fs);
+  void NegD(FpuRegister fd, FpuRegister fs);
+
+  void Cvtsw(FpuRegister fd, FpuRegister fs);
+  void Cvtdw(FpuRegister fd, FpuRegister fs);
+  void Cvtsd(FpuRegister fd, FpuRegister fs);
+  void Cvtds(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
-  void Mtc1(FpuRegister ft, GpuRegister rs);
+  void Mtc1(GpuRegister rt, FpuRegister fs);
+  void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
+  void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
   void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
   void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
@@ -127,15 +186,33 @@ class Mips64Assembler FINAL : public Assembler {
 
   void Break();
   void Nop();
-  void Move(GpuRegister rt, GpuRegister rs);
-  void Clear(GpuRegister rt);
-  void Not(GpuRegister rt, GpuRegister rs);
-  void Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Div(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-
-  void AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value);
-  void LoadImmediate64(GpuRegister rt, int32_t value);
+  void Move(GpuRegister rd, GpuRegister rs);
+  void Clear(GpuRegister rd);
+  void Not(GpuRegister rd, GpuRegister rs);
+
+  // Higher level composite instructions
+  void LoadConst32(GpuRegister rd, int32_t value);
+  void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
+
+  void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
+  void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
+
+  void Bind(Label* label);  // R6
+  void B(Label* label);  // R6
+  void Jalr(Label* label, GpuRegister indirect_reg = RA);  // R6
+  // TODO: implement common for R6 and non-R6 interface for conditional branches?
+  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bltzc(GpuRegister rt, Label* label);  // R6
+  void Bgtzc(GpuRegister rt, Label* label);  // R6
+  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bgezc(GpuRegister rt, Label* label);  // R6
+  void Blezc(GpuRegister rt, Label* label);  // R6
+  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Beqzc(GpuRegister rs, Label* label);  // R6
+  void Bnezc(GpuRegister rs, Label* label);  // R6
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -144,10 +221,7 @@ class Mips64Assembler FINAL : public Assembler {
   void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
 
   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
-  void Emit(int32_t value);
-  void EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal);
-  void EmitJump(Label* label, bool link);
-  void Bind(Label* label, bool is_jump);
+  void Emit(uint32_t value);
 
   //
   // Overridden common assembler high-level functionality
@@ -269,13 +343,11 @@ class Mips64Assembler FINAL : public Assembler {
  private:
   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
-  void EmitJ(int opcode, int address);
+  void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
+  void EmitJ(int opcode, uint32_t addr26);
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
 
-  int32_t EncodeBranchOffset(int offset, int32_t inst, bool is_jump);
-  int DecodeBranchOffset(int32_t inst, bool is_jump);
-
   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
 };
 
diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h
index 8b7697cac3..f57498d34f 100644
--- a/compiler/utils/mips64/constants_mips64.h
+++ b/compiler/utils/mips64/constants_mips64.h
@@ -67,7 +67,7 @@ class Instr {
   static const uint32_t kBreakPointInstruction = 0x0000000D;
 
   bool IsBreakPoint() {
-    return ((*reinterpret_cast<const uint32_t*>(this)) & 0xFC0000CF) == kBreakPointInstruction;
+    return ((*reinterpret_cast<const uint32_t*>(this)) & 0xFC00003F) == kBreakPointInstruction;
   }
 
   // Instructions are read out of a code stream. The only way to get a
diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc
index 6334717fe5..e604c1f629 100644
--- a/disassembler/disassembler.cc
+++ b/disassembler/disassembler.cc
@@ -55,4 +55,8 @@ std::string Disassembler::FormatInstructionPointer(const uint8_t* begin) {
   }
 }
 
+Disassembler* create_disassembler(InstructionSet instruction_set, DisassemblerOptions* options) {
+  return Disassembler::Create(instruction_set, options);
+}
+
 }  // namespace art
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index 966ee3ac89..b99e5c2df4 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -63,6 +63,10 @@ class Disassembler {
   // Dump instructions within a range.
   virtual void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) = 0;
 
+  const DisassemblerOptions* GetDisassemblerOptions() const {
+    return disassembler_options_;
+  }
+
  protected:
   explicit Disassembler(DisassemblerOptions* disassembler_options)
       : disassembler_options_(disassembler_options) {
@@ -80,6 +84,9 @@ static inline bool HasBitSet(uint32_t value, uint32_t bit) {
   return (value & (1 << bit)) != 0;
 }
 
+extern "C"
+Disassembler* create_disassembler(InstructionSet instruction_set, DisassemblerOptions* options);
+
 }  // namespace art
 
 #endif  // ART_DISASSEMBLER_DISASSEMBLER_H_
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index ac8173773d..70ca88dc55 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -106,6 +106,9 @@ static const MipsInstruction gMipsInstructions[] = {
   { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", },
   { kRTypeMask, 62, "dsrl32", "DTA", },
   { kRTypeMask, 63, "dsra32", "DTA", },
+  { kRTypeMask, (31u << kOpcodeShift) | 3, "dext", "TSAZ", },
+  { kRTypeMask | (0x1f << 21) | (0x1f << 6), (31u << 26) | (16 << 6) | 32, "seb", "DT", },
+  { kRTypeMask | (0x1f << 21) | (0x1f << 6), (31u << 26) | (24 << 6) | 32, "seh", "DT", },
 
   // SPECIAL0
   { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
@@ -150,13 +153,17 @@ static const MipsInstruction gMipsInstructions[] = {
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (16 << 16), "bltzal", "SB" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (18 << 16), "bltzall", "SB" },
   { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" },
+  { kITypeMask, 6 << kOpcodeShift, "bgeuc", "STB" },
   { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" },
+  { kITypeMask, 7 << kOpcodeShift, "bltuc", "STB" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", },
 
   { 0xffff0000, (4 << kOpcodeShift), "b", "B" },
   { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" },
 
+  { kITypeMask, 8 << kOpcodeShift, "beqc", "STB" },
+
   { kITypeMask, 8 << kOpcodeShift, "addi", "TSi", },
   { kITypeMask, 9 << kOpcodeShift, "addiu", "TSi", },
   { kITypeMask, 10 << kOpcodeShift, "slti", "TSi", },
@@ -166,6 +173,83 @@ static const MipsInstruction gMipsInstructions[] = {
   { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", },
   { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
   { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
+
+  { kITypeMask | (0x1f << 21), 22 << kOpcodeShift, "blezc", "TB" },
+
+  // TODO: de-dup
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (1  << 21) | (1  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (2  << 21) | (2  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (3  << 21) | (3  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (4  << 21) | (4  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (5  << 21) | (5  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (6  << 21) | (6  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (7  << 21) | (7  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (8  << 21) | (8  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (9  << 21) | (9  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (10 << 21) | (10 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (11 << 21) | (11 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (12 << 21) | (12 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (13 << 21) | (13 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (14 << 21) | (14 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (15 << 21) | (15 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (16 << 21) | (16 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (17 << 21) | (17 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (18 << 21) | (18 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (19 << 21) | (19 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (20 << 21) | (20 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (21 << 21) | (21 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (22 << 21) | (22 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (23 << 21) | (23 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (24 << 21) | (24 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (25 << 21) | (25 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (26 << 21) | (26 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (27 << 21) | (27 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (28 << 21) | (28 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (29 << 21) | (29 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (30 << 21) | (30 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (31 << 21) | (31 << 16), "bgezc", "TB" },
+
+  { kITypeMask, 22 << kOpcodeShift, "bgec", "STB" },
+
+  { kITypeMask | (0x1f << 21), 23 << kOpcodeShift, "bgtzc", "TB" },
+
+  // TODO: de-dup
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (1  << 21) | (1  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (2  << 21) | (2  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (3  << 21) | (3  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (4  << 21) | (4  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (5  << 21) | (5  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (6  << 21) | (6  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (7  << 21) | (7  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (8  << 21) | (8  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (9  << 21) | (9  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (10 << 21) | (10 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (11 << 21) | (11 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (12 << 21) | (12 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (13 << 21) | (13 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (14 << 21) | (14 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (15 << 21) | (15 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (16 << 21) | (16 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (17 << 21) | (17 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (18 << 21) | (18 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (19 << 21) | (19 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (20 << 21) | (20 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (21 << 21) | (21 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (22 << 21) | (22 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (23 << 21) | (23 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (24 << 21) | (24 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (25 << 21) | (25 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (26 << 21) | (26 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (27 << 21) | (27 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (28 << 21) | (28 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (29 << 21) | (29 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (30 << 21) | (30 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (31 << 21) | (31 << 16), "bltzc", "TB" },
+
+  { kITypeMask, 23 << kOpcodeShift, "bltc", "STB" },
+
+  { kITypeMask, 24 << kOpcodeShift, "bnec", "STB" },
+
   { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", },
   { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", },
 
@@ -180,9 +264,22 @@ static const MipsInstruction gMipsInstructions[] = {
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
+  { kITypeMask | (0x1f << 21), 54u << kOpcodeShift, "jic", "Ti" },
+  { kITypeMask | (1 << 21), (54u << kOpcodeShift) | (1 << 21), "beqzc", "Sb" },  // TODO: de-dup?
+  { kITypeMask | (1 << 22), (54u << kOpcodeShift) | (1 << 22), "beqzc", "Sb" },
+  { kITypeMask | (1 << 23), (54u << kOpcodeShift) | (1 << 23), "beqzc", "Sb" },
+  { kITypeMask | (1 << 24), (54u << kOpcodeShift) | (1 << 24), "beqzc", "Sb" },
+  { kITypeMask | (1 << 25), (54u << kOpcodeShift) | (1 << 25), "beqzc", "Sb" },
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
+  { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
+  { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
+  { kITypeMask | (1 << 22), (62u << kOpcodeShift) | (1 << 22), "bnezc", "Sb" },
+  { kITypeMask | (1 << 23), (62u << kOpcodeShift) | (1 << 23), "bnezc", "Sb" },
+  { kITypeMask | (1 << 24), (62u << kOpcodeShift) | (1 << 24), "bnezc", "Sb" },
+  { kITypeMask | (1 << 25), (62u << kOpcodeShift) | (1 << 25), "bnezc", "Sb" },
   { kITypeMask, 63u << kOpcodeShift, "sd", "TO", },
 
   // Floating point.
@@ -241,7 +338,7 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) {
       opcode = gMipsInstructions[i].name;
       for (const char* args_fmt = gMipsInstructions[i].args_fmt; *args_fmt; ++args_fmt) {
         switch (*args_fmt) {
-          case 'A':  // sa (shift amount).
+          case 'A':  // sa (shift amount or [d]ext position).
             args << sa;
             break;
           case 'B':  // Branch offset.
@@ -253,6 +350,15 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) {
                    << StringPrintf("  ; %+d", offset);
             }
             break;
+          case 'b':  // 21-bit branch offset.
+            {
+              int32_t offset = (instruction & 0x1fffff) - ((instruction & 0x100000) << 1);
+              offset <<= 2;
+              offset += 4;  // Delay slot.
+              args << FormatInstructionPointer(instr_ptr + offset)
+                   << StringPrintf("  ; %+d", offset);
+            }
+            break;
           case 'D': args << 'r' << rd; break;
           case 'd': args << 'f' << rd; break;
           case 'a': args << 'f' << sa; break;
@@ -302,6 +408,7 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) {
           case 's': args << 'f' << rs; break;
           case 'T': args << 'r' << rt; break;
           case 't': args << 'f' << rt; break;
+          case 'Z': args << rd; break;   // sz ([d]ext size).
         }
         if (*(args_fmt + 1)) {
           args << ", ";
@@ -311,9 +418,36 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) {
     }
   }
 
+  // Special cases for sequences of:
+  //   pc-relative +/- 2GB branch:
+  //     auipc  reg, imm
+  //     jic    reg, imm
+  //   pc-relative +/- 2GB branch and link:
+  //     auipc  reg, imm
+  //     daddiu reg, reg, imm
+  //     jialc  reg, 0
+  if (((op == 0x36 && rs == 0 && rt != 0) ||  // jic
+       (op == 0x19 && rs == rt && rt != 0)) &&  // daddiu
+      last_ptr_ && (intptr_t)instr_ptr - (intptr_t)last_ptr_ == 4 &&
+      (last_instr_ & 0xFC1F0000) == 0xEC1E0000 &&  // auipc
+      ((last_instr_ >> 21) & 0x1F) == rt) {
+    uint32_t offset = (last_instr_ << 16) | (instruction & 0xFFFF);
+    offset -= (offset & 0x8000) << 1;
+    offset -= 4;
+    if (op == 0x36) {
+      args << "  ; b ";
+    } else {
+      args << "  ; move r" << rt << ", ";
+    }
+    args << FormatInstructionPointer(instr_ptr + (int32_t)offset);
+    args << StringPrintf("  ; %+d", (int32_t)offset);
+  }
+
   os << FormatInstructionPointer(instr_ptr)
      << StringPrintf(": %08x\t%-7s ", instruction, opcode.c_str())
      << args.str() << '\n';
+  last_ptr_ = instr_ptr;
+  last_instr_ = instruction;
   return 4;
 }
 
diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h
index 67c3fcb71f..4f70a9b523 100644
--- a/disassembler/disassembler_mips.h
+++ b/disassembler/disassembler_mips.h
@@ -27,7 +27,9 @@ namespace mips {
 class DisassemblerMips FINAL : public Disassembler {
  public:
   explicit DisassemblerMips(DisassemblerOptions* options, bool is64bit) : Disassembler(options),
-      is64bit_(is64bit) {}
+      is64bit_(is64bit),
+      last_ptr_(nullptr),
+      last_instr_(0) {}
 
   size_t Dump(std::ostream& os, const uint8_t* begin) OVERRIDE;
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
@@ -35,6 +37,11 @@ class DisassemblerMips FINAL : public Disassembler {
  private:
   const bool is64bit_;
 
+  // Address and encoding of the last disassembled instruction.
+  // Needed to produce more readable disassembly of certain 2-instruction sequences.
+  const uint8_t* last_ptr_;
+  uint32_t last_instr_;
+
   DISALLOW_COPY_AND_ASSIGN(DisassemblerMips);
 };
 
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index f01afc541d..a3ef38db85 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -47,12 +47,28 @@ dump-oat-core-host: $(HOST_CORE_IMG_OUTS) $(OATDUMP)
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 endif
 
-.PHONY: dump-oat-core-target
+.PHONY: dump-oat-core-target-$(TARGET_ARCH)
+ifeq ($(ART_BUILD_TARGET),true)
+dump-oat-core-target-$(TARGET_ARCH): $(TARGET_CORE_IMAGE_default_no-pic_$(ART_PHONY_TEST_TARGET_SUFFIX)) $(OATDUMP)
+	$(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) \
+	  --output=$(ART_DUMP_OAT_PATH)/core.target.$(TARGET_ARCH).oatdump.txt --instruction-set=$(TARGET_ARCH)
+	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.$(TARGET_ARCH).oatdump.txt
+endif
+
+ifdef TARGET_2ND_ARCH
+.PHONY: dump-oat-core-target-$(TARGET_2ND_ARCH)
 ifeq ($(ART_BUILD_TARGET),true)
-dump-oat-core-target: $(TARGET_CORE_IMAGE_default_no-pic_32) $(OATDUMP)
+dump-oat-core-target-$(TARGET_2ND_ARCH): $(TARGET_CORE_IMAGE_default_no-pic_$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)) $(OATDUMP)
 	$(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) \
-	  --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt --instruction-set=$(TARGET_ARCH)
-	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
+	  --output=$(ART_DUMP_OAT_PATH)/core.target.$(TARGET_2ND_ARCH).oatdump.txt --instruction-set=$(TARGET_2ND_ARCH)
+	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.$(TARGET_2ND_ARCH).oatdump.txt
+endif
+endif
+
+.PHONY: dump-oat-core-target
+dump-oat-core-target: dump-oat-core-target-$(TARGET_ARCH)
+ifdef TARGET_2ND_ARCH
+dump-oat-core-target: dump-oat-core-target-$(TARGET_2ND_ARCH)
 endif
 
 .PHONY: dump-oat-boot-$(TARGET_ARCH)
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 9e9dea64c6..b3801b351f 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -355,7 +355,7 @@ class OatDumper {
       disassembler_(Disassembler::Create(instruction_set_,
                                          new DisassemblerOptions(options_.absolute_addresses_,
                                                                  oat_file.Begin(),
-                                                                 true /* can_read_litals_ */))) {
+                                                                 true /* can_read_literals_ */))) {
     CHECK(options_.class_loader_ != nullptr);
     CHECK(options_.class_filter_ != nullptr);
     CHECK(options_.method_filter_ != nullptr);
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 90b0d5374c..d81e0a9b96 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -70,7 +70,7 @@ void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
-  VLOG(signals) << "sp: " << *out_sp;
+  VLOG(signals) << "sp: " << std::hex << *out_sp;
   if (*out_sp == 0) {
     return;
   }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 44b67ca61b..bee379eae2 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1400,8 +1400,9 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-    addl LITERAL(76), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-76)
+    addl LITERAL(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE))
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 66dfe5a432..5c741a5f61 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1334,9 +1334,8 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler
     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
     movq %rsp, %rcx                         // Pass SP.
     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     movq %rax, %xmm0                        // Copy return value in case of float returns.
-    addq LITERAL(168 + 4*8), %rsp           // Pop arguments.
-    CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
 
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index d21f551209..0d842cc261 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -16,6 +16,7 @@
 
 #include "barrier.h"
 
+#include "base/logging.h"
 #include "base/mutex.h"
 #include "base/time_utils.h"
 #include "thread.h"
@@ -87,7 +88,14 @@ void Barrier::SetCountLocked(Thread* self, int count) {
 }
 
 Barrier::~Barrier() {
-  CHECK_EQ(count_, 0) << "Attempted to destroy barrier with non zero count";
+  if (gAborting == 0) {
+    // Only check when not aborting.
+    CHECK_EQ(count_, 0) << "Attempted to destroy barrier with non zero count";
+  } else {
+    if (count_ != 0) {
+      LOG(WARNING) << "Attempted to destroy barrier with non zero count " << count_;
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 8daf6d4c9e..f2c8355f53 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -469,8 +469,6 @@ class HashSet {
     }
     // Resize based on the minimum load factor.
     Resize(min_index);
-    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
-    elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
 
   // Expand / shrink the table to the new specified size.
@@ -493,11 +491,18 @@ class HashSet {
     if (owned_data) {
       allocfn_.deallocate(old_data, old_num_buckets);
     }
+
+    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
+    elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
 
   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
+    DCHECK_LT(index, NumBuckets());  // Don't try to get a slot out of range.
+    size_t non_empty_count = 0;
     while (!emptyfn_.IsEmpty(data_[index])) {
       index = NextIndex(index);
+      non_empty_count++;
+      DCHECK_LE(non_empty_count, NumBuckets());  // Don't loop forever.
     }
     return index;
   }
@@ -526,7 +531,7 @@ class HashSet {
   Pred pred_;  // Equals function.
   size_t num_elements_;  // Number of inserted elements.
   size_t num_buckets_;  // Number of hash table buckets.
-  size_t elements_until_expand_;  // Maxmimum number of elements until we expand the table.
+  size_t elements_until_expand_;  // Maximum number of elements until we expand the table.
   bool owns_data_;  // If we own data_ and are responsible for freeing it.
   T* data_;  // Backing storage.
   double min_load_factor_;
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index e88637ffa5..fd9eb45e3f 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -156,6 +156,38 @@ TEST_F(HashSetTest, TestSwap) {
   }
 }
 
+TEST_F(HashSetTest, TestShrink) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
+  for (size_t i = 0; i < strings.size(); ++i) {
+    // Insert some strings into the beginning of our hash set to establish an initial size
+    hash_set.Insert(strings[i]);
+  }
+
+  hash_set.ShrinkToMaximumLoad();
+  const double initial_load = hash_set.CalculateLoadFactor();
+
+  // Insert a bunch of random strings to guarantee that we grow the capacity.
+  std::vector<std::string> random_strings;
+  static constexpr size_t count = 1000;
+  for (size_t i = 0; i < count; ++i) {
+    random_strings.push_back(RandomString(10));
+    hash_set.Insert(random_strings[i]);
+  }
+
+  // Erase all the extra strings which guarantees that our load factor will be really bad.
+  for (size_t i = 0; i < count; ++i) {
+    hash_set.Erase(hash_set.Find(random_strings[i]));
+  }
+
+  const double bad_load = hash_set.CalculateLoadFactor();
+  EXPECT_GT(initial_load, bad_load);
+
+  // Shrink again, the load factor should be good again.
+  hash_set.ShrinkToMaximumLoad();
+  EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
+}
+
 TEST_F(HashSetTest, TestStress) {
   HashSet<std::string, IsEmptyFnString> hash_set;
   std::unordered_multiset<std::string> std_set;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 0ab148e37e..aa91ca161d 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -61,6 +61,7 @@ enum LockLevel {
   kAbortLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
+  kTransactionLogLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
   kReferenceQueueFinalizerReferencesLock,
@@ -77,7 +78,6 @@ enum LockLevel {
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
   kMarkSweepMarkStackLock,
-  kTransactionLogLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kDefaultMutexLevel,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 98fa897637..91b50006a9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1310,7 +1310,7 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
 void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
-  class_roots_.VisitRoot(visitor, RootInfo(kRootVMInternal));
+  class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   Thread* const self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
@@ -1333,9 +1333,9 @@ void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
     }
   }
   VisitClassRoots(visitor, flags);
-  array_iftable_.VisitRoot(visitor, RootInfo(kRootVMInternal));
-  for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
-    find_array_class_cache_[i].VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  array_iftable_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  for (GcRoot<mirror::Class>& root : find_array_class_cache_) {
+    root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   }
 }
 
@@ -4928,8 +4928,7 @@ bool ClassLinker::LinkInterfaceMethods(Thread* self, Handle<mirror::Class> klass
             }
           }
           if (miranda_method == nullptr) {
-            size_t size = ArtMethod::ObjectSize(image_pointer_size_);
-            miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(size));
+            miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
             CHECK(miranda_method != nullptr);
             // Point the interface table at a phantom slot.
             new(miranda_method) ArtMethod(*interface_method, image_pointer_size_);
@@ -4970,50 +4969,49 @@ bool ClassLinker::LinkInterfaceMethods(Thread* self, Handle<mirror::Class> klass
     }
     StrideIterator<ArtMethod> out(
         reinterpret_cast<uintptr_t>(virtuals) + old_method_count * method_size, method_size);
-    // Copy the mirada methods before making a copy of the vtable so that moving GC doesn't miss
-    // any roots. This is necessary since these miranda methods wont get their roots visited from
-    // the class table root visiting until they are copied to the new virtuals array.
-    const size_t old_vtable_count = vtable->GetLength();
-    const size_t new_vtable_count = old_vtable_count + miranda_methods.size();
-    size_t method_idx = old_vtable_count;
-    for (auto* mir_method : miranda_methods) {
-      ArtMethod* out_method = &*out;
-      // Leave the declaring class alone as type indices are relative to it
-      out_method->CopyFrom(mir_method, image_pointer_size_);
-      out_method->SetAccessFlags(out_method->GetAccessFlags() | kAccMiranda);
-      out_method->SetMethodIndex(0xFFFF & method_idx);
-      move_table.emplace(mir_method, out_method);
+    // Copy over miranda methods before copying vtable since CopyOf may cause thread suspension and
+    // we want the roots of the miranda methods to get visited.
+    for (ArtMethod* mir_method : miranda_methods) {
+      out->CopyFrom(mir_method, image_pointer_size_);
+      out->SetAccessFlags(out->GetAccessFlags() | kAccMiranda);
+      move_table.emplace(mir_method, &*out);
       ++out;
-      ++method_idx;
     }
-    DCHECK_EQ(new_vtable_count, method_idx);
     UpdateClassVirtualMethods(klass.Get(), virtuals, new_method_count);
-    // Done copying methods, they are all reachable from the class now, so we can end the no thread
+    // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
+
+    const size_t old_vtable_count = vtable->GetLength();
+    const size_t new_vtable_count = old_vtable_count + miranda_methods.size();
+    miranda_methods.clear();
     vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
     if (UNLIKELY(vtable.Get() == nullptr)) {
       self->AssertPendingOOMException();
       return false;
     }
+    out = StrideIterator<ArtMethod>(
+        reinterpret_cast<uintptr_t>(virtuals) + old_method_count * method_size, method_size);
+    size_t vtable_pos = old_vtable_count;
+    for (size_t i = old_method_count; i < new_method_count; ++i) {
+      // Leave the declaring class alone as type indices are relative to it
+      out->SetMethodIndex(0xFFFF & vtable_pos);
+      vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
+      ++out;
+      ++vtable_pos;
+    }
+    CHECK_EQ(vtable_pos, new_vtable_count);
     // Update old vtable methods.
-    for (method_idx = 0; method_idx < old_vtable_count; ++method_idx) {
-      auto* m = vtable->GetElementPtrSize<ArtMethod*>(method_idx, image_pointer_size_);
+    for (size_t i = 0; i < old_vtable_count; ++i) {
+      auto* m = vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_);
       DCHECK(m != nullptr) << PrettyClass(klass.Get());
       auto it = move_table.find(m);
       if (it != move_table.end()) {
         auto* new_m = it->second;
         DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
-        vtable->SetElementPtrSize(method_idx, new_m, image_pointer_size_);
+        vtable->SetElementPtrSize(i, new_m, image_pointer_size_);
       }
     }
-    // Update miranda methods.
-    out = StrideIterator<ArtMethod>(
-        reinterpret_cast<uintptr_t>(virtuals) + old_method_count * method_size, method_size);
-    for (; method_idx < new_vtable_count; ++method_idx) {
-      vtable->SetElementPtrSize(method_idx, &*out, image_pointer_size_);
-      ++out;
-    }
 
     klass->SetVTable(vtable.Get());
     // Go fix up all the stale miranda pointers.
@@ -5214,7 +5212,7 @@ bool ClassLinker::LinkFields(Thread* self, Handle<mirror::Class> klass, bool is_
       ArtField* field = &fields[i];
       VLOG(class_linker) << "LinkFields: " << (is_static ? "static" : "instance")
           << " class=" << PrettyClass(klass.Get()) << " field=" << PrettyField(field) << " offset="
-          << field->GetOffset();
+          << field->GetOffsetDuringLinking();
       if (i != 0) {
         ArtField* const prev_field = &fields[i - 1];
         // NOTE: The field names can be the same. This is not possible in the Java language
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index f84e10c628..f04f3563ca 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -54,10 +54,6 @@
 #include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
 
-#ifdef HAVE_ANDROID_OS
-#include "cutils/properties.h"
-#endif
-
 namespace art {
 
 // The key identifying the debugger to update instrumentation.
@@ -65,22 +61,7 @@ static constexpr const char* kDbgInstrumentationKey = "Debugger";
 
 // Limit alloc_record_count to the 2BE value (64k-1) that is the limit of the current protocol.
 static uint16_t CappedAllocRecordCount(size_t alloc_record_count) {
-  size_t cap = 0xffff;
-#ifdef HAVE_ANDROID_OS
-  // Check whether there's a system property overriding the number of recent records.
-  const char* propertyName = "dalvik.vm.recentAllocMax";
-  char recentAllocMaxString[PROPERTY_VALUE_MAX];
-  if (property_get(propertyName, recentAllocMaxString, "") > 0) {
-    char* end;
-    size_t value = strtoul(recentAllocMaxString, &end, 10);
-    if (*end != '\0') {
-      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
-                 << "' --- invalid";
-    } else {
-      cap = value;
-    }
-  }
-#endif
+  const size_t cap = 0xffff;
   if (alloc_record_count > cap) {
     return cap;
   }
@@ -4725,7 +4706,7 @@ void Dbg::DumpRecentAllocations() {
   gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords();
   CHECK(records != nullptr);
 
-  const uint16_t capped_count = CappedAllocRecordCount(records->Size());
+  const uint16_t capped_count = CappedAllocRecordCount(records->GetRecentAllocationSize());
   uint16_t count = capped_count;
 
   LOG(INFO) << "Tracked allocations, (count=" << count << ")";
@@ -4863,7 +4844,7 @@ jbyteArray Dbg::GetRecentAllocations() {
     StringTable method_names;
     StringTable filenames;
 
-    const uint16_t capped_count = CappedAllocRecordCount(records->Size());
+    const uint16_t capped_count = CappedAllocRecordCount(records->GetRecentAllocationSize());
     uint16_t count = capped_count;
     for (auto it = records->RBegin(), end = records->REnd();
          count > 0 && it != end; count--, it++) {
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index dd65f2c0c6..7344d13805 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -223,6 +223,7 @@ inline bool Instruction::HasVRegB() const {
     case k22t: return true;
     case k22x: return true;
     case k23x: return true;
+    case k25x: return true;
     case k31c: return true;
     case k31i: return true;
     case k31t: return true;
@@ -252,6 +253,7 @@ inline int32_t Instruction::VRegB() const {
     case k22t: return VRegB_22t();
     case k22x: return VRegB_22x();
     case k23x: return VRegB_23x();
+    case k25x: return VRegB_25x();
     case k31c: return VRegB_31c();
     case k31i: return VRegB_31i();
     case k31t: return VRegB_31t();
@@ -329,6 +331,12 @@ inline uint8_t Instruction::VRegB_23x() const {
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
+// Number of additional registers in this instruction. # of var arg registers = this value + 1.
+inline uint4_t Instruction::VRegB_25x() const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+  return InstB(Fetch16(0));
+}
+
 inline uint32_t Instruction::VRegB_31c() const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
   return Fetch32(1);
@@ -375,6 +383,7 @@ inline bool Instruction::HasVRegC() const {
     case k22s: return true;
     case k22t: return true;
     case k23x: return true;
+    case k25x: return true;
     case k35c: return true;
     case k3rc: return true;
     default: return false;
@@ -388,6 +397,7 @@ inline int32_t Instruction::VRegC() const {
     case k22s: return VRegC_22s();
     case k22t: return VRegC_22t();
     case k23x: return VRegC_23x();
+    case k25x: return VRegC_25x();
     case k35c: return VRegC_35c();
     case k3rc: return VRegC_3rc();
     default:
@@ -421,6 +431,11 @@ inline uint8_t Instruction::VRegC_23x() const {
   return static_cast<uint8_t>(Fetch16(1) >> 8);
 }
 
+inline uint4_t Instruction::VRegC_25x() const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+  return static_cast<uint4_t>(Fetch16(1) & 0xf);
+}
+
 inline uint4_t Instruction::VRegC_35c() const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
   return static_cast<uint4_t>(Fetch16(2) & 0x0f);
@@ -431,11 +446,78 @@ inline uint16_t Instruction::VRegC_3rc() const {
   return Fetch16(2);
 }
 
-inline bool Instruction::HasVarArgs() const {
+inline bool Instruction::HasVarArgs35c() const {
   return FormatOf(Opcode()) == k35c;
 }
 
-inline void Instruction::GetVarArgs(uint32_t arg[5], uint16_t inst_data) const {
+inline bool Instruction::HasVarArgs25x() const {
+  return FormatOf(Opcode()) == k25x;
+}
+
+// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1.
+inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+
+  /*
+   * The opcode looks like this:
+   *   op vC, {vD, vE, vF, vG}
+   *
+   *  and vB is the (implicit) register count (0-4) which denotes how far from vD to vG to read.
+   *
+   *  vC is always present, so with "op vC, {}" the register count will be 0 even though vC
+   *  is valid.
+   *
+   *  The exact semantic meanings of vC:vG is up to the instruction using the format.
+   *
+   *  Encoding drawing as a bit stream:
+   *  (Note that each uint16 is little endian, and each register takes up 4 bits)
+   *
+   *       uint16  |||   uint16
+   *   7-0     15-8    7-0   15-8
+   *  |------|-----|||-----|-----|
+   *  |opcode|vB|vG|||vD|vC|vF|vE|
+   *  |------|-----|||-----|-----|
+   */
+  uint16_t reg_list = Fetch16(1);
+  uint4_t count = VRegB_25x();
+  DCHECK_LE(count, 4U) << "Invalid arg count in 25x (" << count << ")";
+
+  /*
+   * TODO(iam): Change instruction encoding to one of:
+   *
+   * - (X) vA = args count, vB = closure register, {vC..vG} = args (25x)
+   * - (Y) vA = args count, vB = method index, {vC..vG} = args (35x)
+   *
+   * (do this in conjunction with adding verifier support for invoke-lambda)
+   */
+
+  /*
+   * Copy the argument registers into the arg[] array, and
+   * also copy the first argument into vC. (The
+   * DecodedInstruction structure doesn't have separate
+   * fields for {vD, vE, vF, vG}, so there's no need to make
+   * copies of those.) Note that all cases fall-through.
+   */
+  switch (count) {
+    case 4:
+      arg[4] = (Fetch16(0) >> 8) & 0x0f;  // vG
+      FALLTHROUGH_INTENDED;
+    case 3:
+      arg[3] = (reg_list >> 12) & 0x0f;  // vF
+      FALLTHROUGH_INTENDED;
+    case 2:
+      arg[2] = (reg_list >> 8) & 0x0f;  // vE
+      FALLTHROUGH_INTENDED;
+    case 1:
+      arg[1] = (reg_list >> 4) & 0x0f;  // vD
+      FALLTHROUGH_INTENDED;
+    default:  // case 0
+      arg[0] = VRegC_25x();  // vC
+      break;
+  }
+}
+
+inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
   /*
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 69fe87464d..537fa154b1 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -63,7 +63,7 @@ int const Instruction::kInstructionSizeInCodeUnits[] = {
 #define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
     ((opcode == NOP)                        ? -1 : \
      ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k22c)) ?  2 : \
+     ((format >= k20t) && (format <= k25x)) ?  2 : \
      ((format >= k32x) && (format <= k3rc)) ?  3 : \
       (format == k51l)                      ?  5 : -1),
 #include "dex_instruction_list.h"
@@ -224,6 +224,14 @@ std::string Instruction::DumpString(const DexFile* file) const {
             break;
           }
           FALLTHROUGH_INTENDED;
+        case CREATE_LAMBDA:
+          if (file != nullptr) {
+            uint32_t method_idx = VRegB_21c();
+            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyMethod(method_idx, *file, true)
+               << " // method@" << method_idx;
+            break;
+          }
+          FALLTHROUGH_INTENDED;
         default:
           os << StringPrintf("%s v%d, thing@%d", opcode, VRegA_21c(), VRegB_21c());
           break;
@@ -304,6 +312,26 @@ std::string Instruction::DumpString(const DexFile* file) const {
       }
       break;
     }
+    case k25x: {
+      if (Opcode() == INVOKE_LAMBDA) {
+        uint32_t arg[kMaxVarArgRegs];
+        GetAllArgs25x(arg);
+        const size_t num_extra_var_args = VRegB_25x();
+        DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs);
+
+        // invoke-lambda vC, {vD, vE, vF, vG}
+        os << opcode << " v" << arg[0] << ", {";
+        for (size_t i = 0; i < num_extra_var_args; ++i) {
+          if (i != 0) {
+            os << ", ";
+          }
+          os << "v" << arg[i+1];
+        }
+        os << "}";
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    }
     case k32x:  os << StringPrintf("%s v%d, v%d", opcode, VRegA_32x(), VRegB_32x()); break;
     case k30t:  os << StringPrintf("%s %+d", opcode, VRegA_30t()); break;
     case k31t:  os << StringPrintf("%s v%d, %+d", opcode, VRegA_31t(), VRegB_31t()); break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index c64c21e45d..b043abaa33 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -105,6 +105,7 @@ class Instruction {
     k22t,  // op vA, vB, +CCCC
     k22s,  // op vA, vB, #+CCCC
     k22c,  // op vA, vB, thing@CCCC
+    k25x,  // op vC, {vD, vE, vF, vG} (B: count)
     k32x,  // op vAAAA, vBBBB
     k30t,  // op +AAAAAAAA
     k31t,  // op vAA, +BBBBBBBB
@@ -116,30 +117,31 @@ class Instruction {
   };
 
   enum Flags {
-    kBranch              = 0x000001,  // conditional or unconditional branch
-    kContinue            = 0x000002,  // flow can continue to next statement
-    kSwitch              = 0x000004,  // switch statement
-    kThrow               = 0x000008,  // could cause an exception to be thrown
-    kReturn              = 0x000010,  // returns, no additional statements
-    kInvoke              = 0x000020,  // a flavor of invoke
-    kUnconditional       = 0x000040,  // unconditional branch
-    kAdd                 = 0x000080,  // addition
-    kSubtract            = 0x000100,  // subtract
-    kMultiply            = 0x000200,  // multiply
-    kDivide              = 0x000400,  // division
-    kRemainder           = 0x000800,  // remainder
-    kAnd                 = 0x001000,  // and
-    kOr                  = 0x002000,  // or
-    kXor                 = 0x004000,  // xor
-    kShl                 = 0x008000,  // shl
-    kShr                 = 0x010000,  // shr
-    kUshr                = 0x020000,  // ushr
-    kCast                = 0x040000,  // cast
-    kStore               = 0x080000,  // store opcode
-    kLoad                = 0x100000,  // load opcode
-    kClobber             = 0x200000,  // clobbers memory in a big way (not just a write)
-    kRegCFieldOrConstant = 0x400000,  // is the third virtual register a field or literal constant (vC)
-    kRegBFieldOrConstant = 0x800000,  // is the second virtual register a field or literal constant (vB)
+    kBranch              = 0x0000001,  // conditional or unconditional branch
+    kContinue            = 0x0000002,  // flow can continue to next statement
+    kSwitch              = 0x0000004,  // switch statement
+    kThrow               = 0x0000008,  // could cause an exception to be thrown
+    kReturn              = 0x0000010,  // returns, no additional statements
+    kInvoke              = 0x0000020,  // a flavor of invoke
+    kUnconditional       = 0x0000040,  // unconditional branch
+    kAdd                 = 0x0000080,  // addition
+    kSubtract            = 0x0000100,  // subtract
+    kMultiply            = 0x0000200,  // multiply
+    kDivide              = 0x0000400,  // division
+    kRemainder           = 0x0000800,  // remainder
+    kAnd                 = 0x0001000,  // and
+    kOr                  = 0x0002000,  // or
+    kXor                 = 0x0004000,  // xor
+    kShl                 = 0x0008000,  // shl
+    kShr                 = 0x0010000,  // shr
+    kUshr                = 0x0020000,  // ushr
+    kCast                = 0x0040000,  // cast
+    kStore               = 0x0080000,  // store opcode
+    kLoad                = 0x0100000,  // load opcode
+    kClobber             = 0x0200000,  // clobbers memory in a big way (not just a write)
+    kRegCFieldOrConstant = 0x0400000,  // is the third virtual register a field or literal constant (vC)
+    kRegBFieldOrConstant = 0x0800000,  // is the second virtual register a field or literal constant (vB)
+    kExperimental        = 0x1000000,  // is an experimental opcode
   };
 
   enum VerifyFlag {
@@ -205,7 +207,7 @@ class Instruction {
 
   // Returns a pointer to the instruction after this 2xx instruction in the stream.
   const Instruction* Next_2xx() const {
-    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k22c);
+    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k25x);
     return RelativeAt(2);
   }
 
@@ -355,6 +357,7 @@ class Instruction {
   }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
+  uint4_t VRegB_25x() const;
   uint32_t VRegB_31c() const;
   int32_t VRegB_31i() const;
   int32_t VRegB_31t() const;
@@ -381,15 +384,20 @@ class Instruction {
   int16_t VRegC_22s() const;
   int16_t VRegC_22t() const;
   uint8_t VRegC_23x() const;
+  uint4_t VRegC_25x() const;
   uint4_t VRegC_35c() const;
   uint16_t VRegC_3rc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  bool HasVarArgs() const;
+  bool HasVarArgs35c() const;
+  bool HasVarArgs25x() const;
+
+  // TODO(iam): Make this name more consistent with GetAllArgs25x by including the opcode format.
   void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
+  void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -489,6 +497,11 @@ class Instruction {
     return (kInstructionFlags[Opcode()] & kInvoke) != 0;
   }
 
+  // Determine if this instruction is experimental.
+  bool IsExperimental() const {
+    return (kInstructionFlags[Opcode()] & kExperimental) != 0;
+  }
+
   int GetVerifyTypeArgumentA() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegA | kVerifyRegAWide));
   }
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index f8f85f9911..803d58d776 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -261,13 +261,14 @@
   V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, UNUSED_F3, "unused-f3", k10x, false, kUnknown, 0, kVerifyError) \
+  V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
   V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF6, UNUSED_F6, "unused-f6", k10x, false, kUnknown, 0, kVerifyError) \
+  /* TODO(iam): get rid of the unused 'false' column */ \
+  V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
   V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF8, UNUSED_F8, "unused-f8", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF9, UNUSED_F9, "unused-f9", k10x, false, kUnknown, 0, kVerifyError) \
+  V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
+  V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
   V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xFB, UNUSED_FB, "unused-fb", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xFC, UNUSED_FC, "unused-fc", k10x, false, kUnknown, 0, kVerifyError) \
@@ -292,6 +293,7 @@
   V(k22t) \
   V(k22s) \
   V(k22c) \
+  V(k25x) \
   V(k32x) \
   V(k30t) \
   V(k31t) \
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index a385363428..ac7de631d9 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -45,10 +45,29 @@ void AllocRecordObjectMap::SetProperties() {
                  << "' --- invalid";
     } else {
       alloc_record_max_ = value;
+      if (recent_record_max_ > value) {
+        recent_record_max_ = value;
+      }
+    }
+  }
+  // Check whether there's a system property overriding the number of recent records.
+  propertyName = "dalvik.vm.recentAllocMax";
+  char recentAllocMaxString[PROPERTY_VALUE_MAX];
+  if (property_get(propertyName, recentAllocMaxString, "") > 0) {
+    char* end;
+    size_t value = strtoul(recentAllocMaxString, &end, 10);
+    if (*end != '\0') {
+      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
+                 << "' --- invalid";
+    } else if (value > alloc_record_max_) {
+      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
+                 << "' --- should be less than " << alloc_record_max_;
+    } else {
+      recent_record_max_ = value;
     }
   }
   // Check whether there's a system property overriding the max depth of stack trace.
-  propertyName = "dalvik.vm.allocStackDepth";
+  propertyName = "debug.allocTracker.stackDepth";
   char stackDepthString[PROPERTY_VALUE_MAX];
   if (property_get(propertyName, stackDepthString, "") > 0) {
     char* end;
@@ -56,6 +75,10 @@ void AllocRecordObjectMap::SetProperties() {
     if (*end != '\0') {
       LOG(ERROR) << "Ignoring  " << propertyName << " '" << stackDepthString
                  << "' --- invalid";
+    } else if (value > kMaxSupportedStackDepth) {
+      LOG(WARNING) << propertyName << " '" << stackDepthString << "' too large, using "
+                   << kMaxSupportedStackDepth;
+      max_stack_depth_ = kMaxSupportedStackDepth;
     } else {
       max_stack_depth_ = value;
     }
@@ -67,6 +90,20 @@ AllocRecordObjectMap::~AllocRecordObjectMap() {
   STLDeleteValues(&entries_);
 }
 
+void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) {
+  CHECK_LE(recent_record_max_, alloc_record_max_);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(visitor, RootInfo(kRootDebugger));
+  size_t count = recent_record_max_;
+  // Only visit the last recent_record_max_ number of objects in entries_.
+  // They need to be retained for DDMS's recent allocation tracking.
+  // TODO: This will cause 098-ddmc test to run out of memory for GC stress test.
+  // There should be an option that do not keep these objects live if allocation tracking is only
+  // for the purpose of an HPROF dump. b/20037135
+  for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
+    buffered_visitor.VisitRoot(it->first);
+  }
+}
+
 void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) {
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0;
@@ -139,6 +176,7 @@ void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) {
       if (self_name == "JDWP") {
         records->alloc_ddm_thread_id_ = self->GetTid();
       }
+      records->scratch_trace_.SetDepth(records->max_stack_depth_);
       size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ +
                   sizeof(AllocRecord) + sizeof(AllocRecordStackTrace);
       LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of "
@@ -181,19 +219,14 @@ void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, s
 
   DCHECK_LE(records->Size(), records->alloc_record_max_);
 
-  // Remove oldest record.
-  if (records->Size() == records->alloc_record_max_) {
-    records->RemoveOldest();
-  }
-
   // Get stack trace.
-  const size_t max_depth = records->max_stack_depth_;
-  AllocRecordStackTrace* trace = new AllocRecordStackTrace(self->GetTid(), max_depth);
-  // add scope to make "visitor" destroyed promptly, in order to set the trace->depth_
+  // add scope to make "visitor" destroyed promptly, in order to set the scratch_trace_->depth_
   {
-    AllocRecordStackVisitor visitor(self, trace, max_depth);
+    AllocRecordStackVisitor visitor(self, &records->scratch_trace_, records->max_stack_depth_);
     visitor.WalkStack();
   }
+  records->scratch_trace_.SetTid(self->GetTid());
+  AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_);
 
   // Fill in the basics.
   AllocRecord* record = new AllocRecord(byte_count, trace);
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 45b3406cea..5214b6b476 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -71,8 +71,15 @@ class AllocRecordStackTrace {
  public:
   static constexpr size_t kHashMultiplier = 17;
 
-  AllocRecordStackTrace(pid_t tid, size_t max_depth)
-      : tid_(tid), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+  explicit AllocRecordStackTrace(size_t max_depth)
+      : tid_(0), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+
+  AllocRecordStackTrace(const AllocRecordStackTrace& r)
+      : tid_(r.tid_), depth_(r.depth_), stack_(new AllocRecordStackTraceElement[r.depth_]) {
+    for (size_t i = 0; i < depth_; ++i) {
+      stack_[i] = r.stack_[i];
+    }
+  }
 
   ~AllocRecordStackTrace() {
     delete[] stack_;
@@ -82,6 +89,10 @@ class AllocRecordStackTrace {
     return tid_;
   }
 
+  void SetTid(pid_t t) {
+    tid_ = t;
+  }
+
   size_t GetDepth() const {
     return depth_;
   }
@@ -102,6 +113,7 @@ class AllocRecordStackTrace {
 
   bool operator==(const AllocRecordStackTrace& other) const {
     if (this == &other) return true;
+    if (tid_ != other.tid_) return false;
     if (depth_ != other.depth_) return false;
     for (size_t i = 0; i < depth_; ++i) {
       if (!(stack_[i] == other.stack_[i])) return false;
@@ -110,7 +122,7 @@ class AllocRecordStackTrace {
   }
 
  private:
-  const pid_t tid_;
+  pid_t tid_;
   size_t depth_;
   AllocRecordStackTraceElement* const stack_;
 };
@@ -200,7 +212,9 @@ class AllocRecordObjectMap {
 
   AllocRecordObjectMap() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_)
       : alloc_record_max_(kDefaultNumAllocRecords),
+        recent_record_max_(kDefaultNumRecentRecords),
         max_stack_depth_(kDefaultAllocStackDepth),
+        scratch_trace_(kMaxSupportedStackDepth),
         alloc_ddm_thread_id_(0) {}
 
   ~AllocRecordObjectMap();
@@ -208,6 +222,10 @@ class AllocRecordObjectMap {
   void Put(mirror::Object* obj, AllocRecord* record)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+    if (entries_.size() == alloc_record_max_) {
+      delete entries_.front().second;
+      entries_.pop_front();
+    }
     entries_.emplace_back(GcRoot<mirror::Object>(obj), record);
   }
 
@@ -215,17 +233,19 @@ class AllocRecordObjectMap {
     return entries_.size();
   }
 
-  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
+  size_t GetRecentAllocationSize() const SHARED_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+    CHECK_LE(recent_record_max_, alloc_record_max_);
+    size_t sz = entries_.size();
+    return std::min(recent_record_max_, sz);
+  }
+
+  void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
-  void RemoveOldest()
+  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
-    DCHECK(!entries_.empty());
-    delete entries_.front().second;
-    entries_.pop_front();
-  }
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
   // TODO: Is there a better way to hide the entries_'s type?
   EntryList::iterator Begin()
@@ -254,12 +274,13 @@ class AllocRecordObjectMap {
 
  private:
   static constexpr size_t kDefaultNumAllocRecords = 512 * 1024;
-  static constexpr size_t kDefaultAllocStackDepth = 4;
+  static constexpr size_t kDefaultNumRecentRecords = 64 * 1024 - 1;
+  static constexpr size_t kDefaultAllocStackDepth = 16;
+  static constexpr size_t kMaxSupportedStackDepth = 128;
   size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  // The implementation always allocates max_stack_depth_ number of frames for each stack trace.
-  // As long as the max depth is not very large, this is not a waste of memory since most stack
-  // traces will fill up the max depth number of the frames.
+  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_);
   pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_);
   EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 6984c1624f..c7d2e9f2c9 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -412,6 +412,11 @@ void ConcurrentCopying::MarkingPhase() {
     // the mark stack here once again.
     ProcessMarkStack();
     CheckEmptyMarkQueue();
+    if (kVerboseMode) {
+      LOG(INFO) << "AllowNewSystemWeaks";
+    }
+    Runtime::Current()->AllowNewSystemWeaks();
+    IssueEmptyCheckpoint();
     // Disable marking.
     if (kUseTableLookupReadBarrier) {
       heap_->rb_table_->ClearAll();
@@ -419,10 +424,6 @@ void ConcurrentCopying::MarkingPhase() {
     }
     is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(1);
     is_marking_ = false;
-    if (kVerboseMode) {
-      LOG(INFO) << "AllowNewSystemWeaks";
-    }
-    Runtime::Current()->AllowNewSystemWeaks();
     CheckEmptyMarkQueue();
   }
 
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index ee4568ecea..0ed3b6d9dd 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -175,6 +175,13 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas
   } else {
     DCHECK(!IsAllocTrackingEnabled());
   }
+  if (kInstrumented) {
+    if (gc_stress_mode_) {
+      CheckGcStressMode(self, &obj);
+    }
+  } else {
+    DCHECK(!gc_stress_mode_);
+  }
   // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for
   // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
   // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
@@ -392,7 +399,7 @@ inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) co
   // Zygote resulting in it being prematurely freed.
   // We can only do this for primitive objects since large objects will not be within the card table
   // range. This also means that we rely on SetClass not dirtying the object's card.
-  return byte_count >= large_object_threshold_ && c->IsPrimitiveArray();
+  return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
 }
 
 template <bool kGrow>
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3c020e2f2e..f0ba0bd25a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -21,6 +21,7 @@
 
 #include <limits>
 #include <memory>
+#include <unwind.h>  // For GC verification.
 #include <vector>
 
 #include "art_field-inl.h"
@@ -125,7 +126,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
            bool ignore_max_footprint, bool use_tlab,
            bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
            bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-           bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction_for_oom,
+           bool verify_post_gc_rosalloc, bool gc_stress_mode,
+           bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
@@ -170,6 +172,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
       verify_pre_sweeping_rosalloc_(verify_pre_sweeping_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
+      gc_stress_mode_(gc_stress_mode),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
        * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
        * verification is enabled, we limit the size of allocation stacks to speed up their
@@ -210,13 +213,17 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       gc_count_rate_histogram_("gc count rate histogram", 1U, kGcCountRateMaxBucketCount),
       blocking_gc_count_rate_histogram_("blocking gc count rate histogram", 1U,
                                         kGcCountRateMaxBucketCount),
-      alloc_tracking_enabled_(false) {
+      alloc_tracking_enabled_(false),
+      backtrace_lock_(nullptr),
+      seen_backtrace_count_(0u),
+      unique_backtrace_count_(0u) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  Runtime* const runtime = Runtime::Current();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
-  const bool is_zygote = Runtime::Current()->IsZygote();
+  const bool is_zygote = runtime->IsZygote();
   if (!is_zygote) {
     // Background compaction is currently not supported for command line runs.
     if (background_collector_type_ != foreground_collector_type_) {
@@ -508,8 +515,12 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
-  if (running_on_valgrind_) {
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+  instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation();
+  if (gc_stress_mode_) {
+    backtrace_lock_ = new Mutex("GC complete lock");
+  }
+  if (running_on_valgrind_ || gc_stress_mode_) {
+    instrumentation->InstrumentQuickAllocEntryPoints();
   }
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
@@ -1074,6 +1085,12 @@ Heap::~Heap() {
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
   delete pending_task_lock_;
+  delete backtrace_lock_;
+  if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
+    LOG(INFO) << "gc stress unique=" << unique_backtrace_count_.LoadRelaxed()
+        << " total=" << seen_backtrace_count_.LoadRelaxed() +
+            unique_backtrace_count_.LoadRelaxed();
+  }
   VLOG(heap) << "Finished ~Heap()";
 }
 
@@ -3681,6 +3698,15 @@ void Heap::SetAllocationRecords(AllocRecordObjectMap* records) {
   allocation_records_.reset(records);
 }
 
+void Heap::VisitAllocationRecords(RootVisitor* visitor) const {
+  if (IsAllocTrackingEnabled()) {
+    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+    if (IsAllocTrackingEnabled()) {
+      GetAllocationRecords()->VisitRoots(visitor);
+    }
+  }
+}
+
 void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const {
   if (IsAllocTrackingEnabled()) {
     MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
@@ -3690,5 +3716,73 @@ void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const {
   }
 }
 
+// Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp.
+class StackCrawlState {
+ public:
+  StackCrawlState(uintptr_t* frames, size_t max_depth, size_t skip_count)
+      : frames_(frames), frame_count_(0), max_depth_(max_depth), skip_count_(skip_count) {
+  }
+  size_t GetFrameCount() const {
+    return frame_count_;
+  }
+  static _Unwind_Reason_Code Callback(_Unwind_Context* context, void* arg) {
+    auto* const state = reinterpret_cast<StackCrawlState*>(arg);
+    const uintptr_t ip = _Unwind_GetIP(context);
+    // The first stack frame is get_backtrace itself. Skip it.
+    if (ip != 0 && state->skip_count_ > 0) {
+      --state->skip_count_;
+      return _URC_NO_REASON;
+    }
+    // ip may be off for ARM but it shouldn't matter since we only use it for hashing.
+    state->frames_[state->frame_count_] = ip;
+    state->frame_count_++;
+    return state->frame_count_ >= state->max_depth_ ? _URC_END_OF_STACK : _URC_NO_REASON;
+  }
+
+ private:
+  uintptr_t* const frames_;
+  size_t frame_count_;
+  const size_t max_depth_;
+  size_t skip_count_;
+};
+
+static size_t get_backtrace(uintptr_t* frames, size_t max_depth) {
+  StackCrawlState state(frames, max_depth, 0u);
+  _Unwind_Backtrace(&StackCrawlState::Callback, &state);
+  return state.GetFrameCount();
+}
+
+void Heap::CheckGcStressMode(Thread* self, mirror::Object** obj) {
+  auto* const runtime = Runtime::Current();
+  if (gc_stress_mode_ && runtime->GetClassLinker()->IsInitialized() &&
+      !runtime->IsActiveTransaction() && mirror::Class::HasJavaLangClass()) {
+    // Check if we should GC.
+    bool new_backtrace = false;
+    {
+      static constexpr size_t kMaxFrames = 16u;
+      uintptr_t backtrace[kMaxFrames];
+      const size_t frames = get_backtrace(backtrace, kMaxFrames);
+      uint64_t hash = 0;
+      for (size_t i = 0; i < frames; ++i) {
+        hash = hash * 2654435761 + backtrace[i];
+        hash += (hash >> 13) ^ (hash << 6);
+      }
+      MutexLock mu(self, *backtrace_lock_);
+      new_backtrace = seen_backtraces_.find(hash) == seen_backtraces_.end();
+      if (new_backtrace) {
+        seen_backtraces_.insert(hash);
+      }
+    }
+    if (new_backtrace) {
+      StackHandleScope<1> hs(self);
+      auto h = hs.NewHandleWrapper(obj);
+      CollectGarbage(false);
+      unique_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+    } else {
+      seen_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+    }
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 18244c856b..54a323588c 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -19,6 +19,7 @@
 
 #include <iosfwd>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "allocator_type.h"
@@ -181,7 +182,8 @@ class Heap {
                 bool ignore_max_footprint, bool use_tlab,
                 bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
                 bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-                bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction,
+                bool verify_post_gc_rosalloc, bool gc_stress_mode,
+                bool use_homogeneous_space_compaction,
                 uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
   ~Heap();
@@ -703,6 +705,9 @@ class Heap {
   void SetAllocationRecords(AllocRecordObjectMap* records)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
+  void VisitAllocationRecords(RootVisitor* visitor) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -909,6 +914,10 @@ class Heap {
 
   void UpdateGcCountRateHistograms() EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
+  // GC stress mode attempts to do one GC per unique backtrace.
+  void CheckGcStressMode(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
 
@@ -1064,6 +1073,7 @@ class Heap {
   bool verify_pre_gc_rosalloc_;
   bool verify_pre_sweeping_rosalloc_;
   bool verify_post_gc_rosalloc_;
+  const bool gc_stress_mode_;
 
   // RAII that temporarily disables the rosalloc verification during
   // the zygote fork.
@@ -1219,6 +1229,14 @@ class Heap {
   std::unique_ptr<AllocRecordObjectMap> allocation_records_
       GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  // GC stress related data structures.
+  Mutex* backtrace_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Debugging variables, seen backtraces vs unique backtraces.
+  Atomic<uint64_t> seen_backtrace_count_;
+  Atomic<uint64_t> unique_backtrace_count_;
+  // Stack trace hashes that we already saw,
+  std::unordered_set<uint64_t> seen_backtraces_ GUARDED_BY(backtrace_lock_);
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index d6146f348f..bb604f04c5 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -46,7 +46,9 @@ enum RootType {
   kRootMonitorUsed,
   kRootThreadObject,
   kRootInternedString,
+  kRootFinalizing,  // used for HPROF's conversion to HprofHeapTag
   kRootDebugger,
+  kRootReferenceCleanup,  // used for HPROF's conversion to HprofHeapTag
   kRootVMInternal,
   kRootJNIMonitor,
 };
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 0f6f788013..0bbc014646 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -480,12 +480,28 @@ void AbortTransactionV(Thread* self, const char* fmt, va_list args) {
   Runtime::Current()->AbortTransactionAndThrowAbortError(self, abort_msg);
 }
 
+// Separate declaration is required solely for the attributes.
+template<bool is_range, bool do_assignability_check> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+static inline bool DoCallCommon(ArtMethod* called_method,
+                                Thread* self,
+                                ShadowFrame& shadow_frame,
+                                JValue* result,
+                                uint16_t number_of_inputs,
+                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t vregC) ALWAYS_INLINE;
+
 template<bool is_range, bool do_assignability_check>
-bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-            const Instruction* inst, uint16_t inst_data, JValue* result) {
+static inline bool DoCallCommon(ArtMethod* called_method,
+                                Thread* self,
+                                ShadowFrame& shadow_frame,
+                                JValue* result,
+                                uint16_t number_of_inputs,
+                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t vregC) {
   bool string_init = false;
   // Replace calls to String.<init> with equivalent StringFactory call.
-  if (called_method->GetDeclaringClass()->IsStringClass() && called_method->IsConstructor()) {
+  if (UNLIKELY(called_method->GetDeclaringClass()->IsStringClass()
+               && called_method->IsConstructor())) {
     ScopedObjectAccessUnchecked soa(self);
     jmethodID mid = soa.EncodeMethod(called_method);
     called_method = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
@@ -494,28 +510,56 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
 
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
-  const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+
+  // Number of registers for the callee's call frame.
   uint16_t num_regs;
   if (LIKELY(code_item != nullptr)) {
     num_regs = code_item->registers_size_;
-    DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_);
+    DCHECK_EQ(string_init ? number_of_inputs - 1 : number_of_inputs, code_item->ins_size_);
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
-    num_regs = num_ins;
-    if (string_init) {
-      // The new StringFactory call is static and has one fewer argument.
-      num_regs--;
+    num_regs = number_of_inputs;
+  }
+
+  // Hack for String init:
+  //
+  // Rewrite invoke-x java.lang.String.<init>(this, a, b, c, ...) into:
+  //         invoke-x StringFactory(a, b, c, ...)
+  // by effectively dropping the first virtual register from the invoke.
+  //
+  // (at this point the ArtMethod has already been replaced,
+  // so we just need to fix-up the arguments)
+  uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
+  if (UNLIKELY(code_item == nullptr && string_init)) {
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
+
+    DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
+    // The new StringFactory call is static and has one fewer argument.
+    num_regs--;
+    number_of_inputs--;
+
+    // Rewrite the var-args, dropping the 0th argument ("this")
+    for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) {
+      arg[i - 1] = arg[i];
     }
+    arg[Instruction::kMaxVarArgRegs - 1] = 0;
+
+    // Rewrite the non-var-arg case
+    vregC++;  // Skips the 0th vreg in the range ("this").
   }
 
+  // Parameter registers go at the end of the shadow frame.
+  DCHECK_GE(num_regs, number_of_inputs);
+  size_t first_dest_reg = num_regs - number_of_inputs;
+  DCHECK_NE(first_dest_reg, (size_t)-1);
+
   // Allocate shadow frame on the stack.
-  const char* old_cause = self->StartAssertNoThreadSuspension("DoCall");
+  const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
   void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
   ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0,
                                                     memory));
 
-  // Initialize new shadow frame.
-  size_t first_dest_reg = num_regs - num_ins;
+  // Initialize new shadow frame by copying the registers from the callee shadow frame.
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -530,33 +574,23 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
     uint32_t shorty_len = 0;
     const char* shorty = new_shadow_frame->GetMethod()->GetShorty(&shorty_len);
 
-    // TODO: find a cleaner way to separate non-range and range information without duplicating
-    //       code.
-    uint32_t arg[5];  // only used in invoke-XXX.
-    uint32_t vregC;   // only used in invoke-XXX-range.
-    if (is_range) {
-      vregC = inst->VRegC_3rc();
-    } else {
-      inst->GetVarArgs(arg, inst_data);
-    }
-
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
+
     if (!new_shadow_frame->GetMethod()->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
       ++arg_offset;
-    } else if (string_init) {
-      // Skip the referrer for the new static StringFactory call.
-      ++dest_reg;
-      ++arg_offset;
     }
+
+    // Copy the caller's invoke-* arguments into the callee's parameter registers.
     for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, shorty_len);
       const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
       switch (shorty[shorty_pos + 1]) {
+        // Handle Object references. 1 virtual register slot.
         case 'L': {
           Object* o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
@@ -581,50 +615,40 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
           new_shadow_frame->SetVRegReference(dest_reg, o);
           break;
         }
+        // Handle doubles and longs. 2 consecutive virtual register slots.
         case 'J': case 'D': {
-          uint64_t wide_value = (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << 32) |
-                                static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
+          uint64_t wide_value =
+              (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << BitSizeOf<uint32_t>()) |
+               static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
           new_shadow_frame->SetVRegLong(dest_reg, wide_value);
+          // Skip the next virtual register slot since we already used it.
           ++dest_reg;
           ++arg_offset;
           break;
         }
+        // Handle all other primitives that are always 1 virtual register slot.
         default:
           new_shadow_frame->SetVReg(dest_reg, shadow_frame.GetVReg(src_reg));
           break;
       }
     }
   } else {
+    size_t arg_index = 0;
+
     // Fast path: no extra checks.
     if (is_range) {
-      uint16_t first_src_reg = inst->VRegC_3rc();
-      if (string_init) {
-        // Skip the referrer for the new static StringFactory call.
-        ++first_src_reg;
-        ++first_dest_reg;
-      }
+      // TODO: Implement the range version of invoke-lambda
+      uint16_t first_src_reg = vregC;
+
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
-      DCHECK_LE(num_ins, 5U);
-      uint16_t regList = inst->Fetch16(2);
-      uint16_t count = num_ins;
-      size_t arg_index = 0;
-      if (count == 5) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U,
-                       (inst_data >> 8) & 0x0f);
-        --count;
-      }
-      if (string_init) {
-        // Skip the referrer for the new static StringFactory call.
-        regList >>= 4;
-        ++first_dest_reg;
-        --count;
-      }
-      for (; arg_index < count; ++arg_index, regList >>= 4) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
+      DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs);
+
+      for (; arg_index < number_of_inputs; ++arg_index) {
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]);
       }
     }
     self->EndAssertNoThreadSuspension(old_cause);
@@ -660,8 +684,7 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
 
   if (string_init && !self->IsExceptionPending()) {
     // Set the new string result of the StringFactory.
-    uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-    shadow_frame.SetVRegReference(vregC, result->GetL());
+    shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
     // Overwrite all potential copies of the original result of the new-instance of string with the
     // new result of the StringFactory. Use the verifier to find this set of registers.
     ArtMethod* method = shadow_frame.GetMethod();
@@ -692,6 +715,56 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
   return !self->IsExceptionPending();
 }
 
+template<bool is_range, bool do_assignability_check>
+bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+                  const Instruction* inst, uint16_t inst_data, JValue* result) {
+  const uint4_t num_additional_registers = inst->VRegB_25x();
+  // Argument word count.
+  const uint16_t number_of_inputs = num_additional_registers + 1;
+  // The first input register is always present and is not encoded in the count.
+
+  // TODO: find a cleaner way to separate non-range and range information without duplicating
+  //       code.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t vregC = 0;   // only used in invoke-XXX-range.
+  if (is_range) {
+    vregC = inst->VRegC_3rc();
+  } else {
+    // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
+    UNUSED(inst_data);
+    inst->GetAllArgs25x(arg);
+  }
+
+  // TODO: if there's an assignability check, throw instead?
+  DCHECK(called_method->IsStatic());
+
+  return DoCallCommon<is_range, do_assignability_check>(
+      called_method, self, shadow_frame,
+      result, number_of_inputs, arg, vregC);
+}
+
+template<bool is_range, bool do_assignability_check>
+bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+            const Instruction* inst, uint16_t inst_data, JValue* result) {
+  // Argument word count.
+  const uint16_t number_of_inputs = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+
+  // TODO: find a cleaner way to separate non-range and range information without duplicating
+  //       code.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t vregC = 0;
+  if (is_range) {
+    vregC = inst->VRegC_3rc();
+  } else {
+    vregC = inst->VRegC_35c();
+    inst->GetVarArgs(arg, inst_data);
+  }
+
+  return DoCallCommon<is_range, do_assignability_check>(
+      called_method, self, shadow_frame,
+      result, number_of_inputs, arg, vregC);
+}
+
 template <bool is_range, bool do_access_check, bool transaction_active>
 bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
                       Thread* self, JValue* result) {
@@ -707,50 +780,52 @@ bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
     return false;
   }
   uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  Class* arrayClass = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
-                                             self, false, do_access_check);
-  if (UNLIKELY(arrayClass == nullptr)) {
+  Class* array_class = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
+                                              self, false, do_access_check);
+  if (UNLIKELY(array_class == nullptr)) {
     DCHECK(self->IsExceptionPending());
     return false;
   }
-  CHECK(arrayClass->IsArrayClass());
-  Class* componentClass = arrayClass->GetComponentType();
-  if (UNLIKELY(componentClass->IsPrimitive() && !componentClass->IsPrimitiveInt())) {
-    if (componentClass->IsPrimitiveLong() || componentClass->IsPrimitiveDouble()) {
+  CHECK(array_class->IsArrayClass());
+  Class* component_class = array_class->GetComponentType();
+  const bool is_primitive_int_component = component_class->IsPrimitiveInt();
+  if (UNLIKELY(component_class->IsPrimitive() && !is_primitive_int_component)) {
+    if (component_class->IsPrimitiveLong() || component_class->IsPrimitiveDouble()) {
       ThrowRuntimeException("Bad filled array request for type %s",
-                            PrettyDescriptor(componentClass).c_str());
+                            PrettyDescriptor(component_class).c_str());
     } else {
       self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                "Found type %s; filled-new-array not implemented for anything but 'int'",
-                               PrettyDescriptor(componentClass).c_str());
+                               PrettyDescriptor(component_class).c_str());
     }
     return false;
   }
-  Object* newArray = Array::Alloc<true>(self, arrayClass, length,
-                                        arrayClass->GetComponentSizeShift(),
-                                        Runtime::Current()->GetHeap()->GetCurrentAllocator());
-  if (UNLIKELY(newArray == nullptr)) {
-    DCHECK(self->IsExceptionPending());
+  Object* new_array = Array::Alloc<true>(self, array_class, length,
+                                         array_class->GetComponentSizeShift(),
+                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  if (UNLIKELY(new_array == nullptr)) {
+    self->AssertPendingOOMException();
     return false;
   }
-  uint32_t arg[5];  // only used in filled-new-array.
-  uint32_t vregC;   // only used in filled-new-array-range.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in filled-new-array.
+  uint32_t vregC = 0;   // only used in filled-new-array-range.
   if (is_range) {
     vregC = inst->VRegC_3rc();
   } else {
     inst->GetVarArgs(arg);
   }
-  const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
   for (int32_t i = 0; i < length; ++i) {
     size_t src_reg = is_range ? vregC + i : arg[i];
     if (is_primitive_int_component) {
-      newArray->AsIntArray()->SetWithoutChecks<transaction_active>(i, shadow_frame.GetVReg(src_reg));
+      new_array->AsIntArray()->SetWithoutChecks<transaction_active>(
+          i, shadow_frame.GetVReg(src_reg));
     } else {
-      newArray->AsObjectArray<Object>()->SetWithoutChecks<transaction_active>(i, shadow_frame.GetVRegReference(src_reg));
+      new_array->AsObjectArray<Object>()->SetWithoutChecks<transaction_active>(
+          i, shadow_frame.GetVRegReference(src_reg));
     }
   }
 
-  result->SetL(newArray);
+  result->SetL(new_array);
   return true;
 }
 
@@ -815,6 +890,20 @@ EXPLICIT_DO_CALL_TEMPLATE_DECL(true, false);
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
+// Explicit DoLambdaCall template function declarations.
+#define EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)               \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
+  bool DoLambdaCall<_is_range, _do_assignability_check>(ArtMethod* method, Thread* self,        \
+                                                        ShadowFrame& shadow_frame,              \
+                                                        const Instruction* inst,                \
+                                                        uint16_t inst_data,                     \
+                                                        JValue* result)
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, false);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, true);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, false);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, true);
+#undef EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL
+
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                            \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 0124d90372..a12a58d48f 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -35,6 +35,7 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
+#include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
@@ -97,6 +98,151 @@ template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
+// Invokes the given lambda closure. This is part of the invocation support and is used by
+// DoLambdaInvoke functions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<bool is_range, bool do_assignability_check>
+bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+                  const Instruction* inst, uint16_t inst_data, JValue* result);
+
+// Validates that the art method corresponding to a lambda method target
+// is semantically valid:
+//
+// Must be ACC_STATIC and ACC_LAMBDA. Must be a concrete managed implementation
+// (i.e. not native, not proxy, not abstract, ...).
+//
+// If the validation fails, return false and raise an exception.
+static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool success = false;
+
+  if (UNLIKELY(called_method == nullptr)) {
+    // The shadow frame should already be pushed, so we don't need to update it.
+  } else if (UNLIKELY(called_method->IsAbstract())) {
+    ThrowAbstractMethodError(called_method);
+    // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
+    // TODO(iam): Also make sure that ACC_LAMBDA is set.
+  } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
+    // Method could be native, proxy method, etc. Lambda targets have to be concrete impls,
+    // so don't allow this.
+  } else {
+    success = true;
+  }
+
+  return success;
+}
+
+// Write out the 'ArtMethod*' into vreg and vreg+1
+static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
+                                               const ArtMethod& called_method,
+                                               uint32_t vreg) {
+  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
+  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&called_method));
+  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&called_method)
+                                                    >> BitSizeOf<uint32_t>());
+  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+
+  DCHECK_NE(called_method_lo | called_method_hi, 0u);
+
+  shadow_frame.SetVReg(vreg, called_method_lo);
+  shadow_frame.SetVReg(vreg + 1, called_method_hi);
+}
+
+// Handles create-lambda instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+//
+// As a work-in-progress implementation, this shoves the ArtMethod object corresponding
+// to the target dex method index into the target register vA and vA + 1.
+template<bool do_access_check>
+static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame,
+                                  const Instruction* inst) {
+  /*
+   * create-lambda is opcode 0x21c
+   * - vA is the target register where the closure will be stored into
+   *   (also stores into vA + 1)
+   * - vB is the method index which will be the target for a later invoke-lambda
+   */
+  const uint32_t method_idx = inst->VRegB_21c();
+  mirror::Object* receiver = nullptr;  // Always static. (see 'kStatic')
+  ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
+      method_idx, &receiver, &sf_method, self);
+
+  uint32_t vregA = inst->VRegA_21c();
+
+  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    CHECK(self->IsExceptionPending());
+    shadow_frame.SetVReg(vregA, 0u);
+    shadow_frame.SetVReg(vregA + 1, 0u);
+    return false;
+  }
+
+  WriteLambdaClosureIntoVRegs(shadow_frame, *called_method, vregA);
+  return true;
+}
+
+// Reads out the 'ArtMethod*' stored inside of vreg and vreg+1
+//
+// Validates that the art method points to a valid lambda function, otherwise throws
+// an exception and returns null.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
+                                                           uint32_t vreg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
+  // instead of just an ArtMethod.
+  // This is temporarily using 2 vregs because a native ArtMethod can be up to 64-bit,
+  // but once proper variable capture is implemented it will only use 1 vreg.
+  uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
+  uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
+
+  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
+                           | vc_value_lo;
+
+  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
+
+  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
+  if (UNLIKELY(called_method == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    return nullptr;
+  }
+
+  return called_method;
+}
+
+template<bool do_access_check>
+static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                                  uint16_t inst_data, JValue* result) {
+  /*
+   * invoke-lambda is opcode 0x25
+   *
+   * - vC is the closure register (both vC and vC + 1 will be used to store the closure).
+   * - vB is the number of additional registers up to |{vD,vE,vF,vG}| (4)
+   * - the rest of the registers are always var-args
+   *
+   * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
+   */
+  uint32_t vC = inst->VRegC_25x();
+  ArtMethod* const called_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vC);
+
+  // Failed lambda target runtime check, an exception was raised.
+  if (UNLIKELY(called_method == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  }
+
+  // Invoke a non-range lambda
+  return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
+                                              result);
+}
+
 // Handles invoke-XXX/range instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
@@ -348,6 +494,89 @@ static inline int32_t DoSparseSwitch(const Instruction* inst, const ShadowFrame&
   return 3;
 }
 
+template <bool _do_check>
+static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                               uint16_t inst_data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  /*
+   * box-lambda vA, vB /// opcode 0xf8, format 22x
+   * - vA is the target register where the Object representation of the closure will be stored into
+   * - vB is a closure (made by create-lambda)
+   *   (also reads vB + 1)
+   */
+  uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
+  uint32_t vreg_source_closure = inst->VRegB_22x();
+
+  ArtMethod* const closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
+                                                                      vreg_source_closure);
+
+  // Failed lambda target runtime check, an exception was raised.
+  if (UNLIKELY(closure_method == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  // Convert the ArtMethod into a java.lang.reflect.Method which will serve
+  // as the temporary 'boxed' version of the lambda. This is good enough
+  // to check all the basic object identities that a boxed lambda must retain.
+
+  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
+  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
+  // TODO: Repeated boxing should return the same object reference
+  mirror::Method* method_as_object =
+      mirror::Method::CreateFromArtMethod(self, closure_method);
+
+  if (UNLIKELY(method_as_object == nullptr)) {
+    // Most likely an OOM has occurred.
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  shadow_frame.SetVRegReference(vreg_target_object, method_as_object);
+  return true;
+}
+
+template <bool _do_check> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+static inline bool DoUnboxLambda(Thread* self ATTRIBUTE_UNUSED,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data) {
+  /*
+   * unbox-lambda vA, vB, [type id] /// opcode 0xf9, format 22c
+   * - vA is the target register where the closure will be written into
+   *   (also writes vA + 1)
+   * - vB is the Object representation of the closure (made by box-lambda)
+   */
+  uint32_t vreg_target_closure = inst->VRegA_22c(inst_data);
+  uint32_t vreg_source_object = inst->VRegB_22c();
+
+  // Raise NullPointerException if object is null
+  mirror::Object* boxed_closure_object = shadow_frame.GetVRegReference(vreg_source_object);
+  if (UNLIKELY(boxed_closure_object == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return false;
+  }
+
+  // Raise ClassCastException if object is not instanceof java.lang.reflect.Method
+  if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) {
+    ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass());
+    return false;
+  }
+
+  // TODO(iam): We must check that the closure object extends/implements the type
+  // specified in [type id]. This is not currently implemented since it's always a Method.
+
+  // If we got this far, the inputs are valid.
+  // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target.
+  mirror::AbstractMethod* boxed_closure_as_method =
+      down_cast<mirror::AbstractMethod*>(boxed_closure_object);
+
+  ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod();
+  DCHECK(unboxed_closure != nullptr);
+
+  WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure);
+  return true;
+}
+
 uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
     uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -420,6 +649,46 @@ EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(false);  // invoke-virtual-quick.
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
+// Explicitly instantiate all DoCreateLambda functions.
+#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                    \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                 \
+bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame,              \
+                        const Instruction* inst)
+
+EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
+EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
+#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
+
+// Explicitly instantiate all DoInvokeLambda functions.
+#define EXPLICIT_DO_INVOKE_LAMBDA_DECL(_do_check)                                    \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                 \
+bool DoInvokeLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                               uint16_t inst_data, JValue* result);
+
+EXPLICIT_DO_INVOKE_LAMBDA_DECL(false);  // invoke-lambda
+EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
+#undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
+
+// Explicitly instantiate all DoBoxLambda functions.
+#define EXPLICIT_DO_BOX_LAMBDA_DECL(_do_check)                                                \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
+bool DoBoxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                            uint16_t inst_data);
+
+EXPLICIT_DO_BOX_LAMBDA_DECL(false);  // box-lambda
+EXPLICIT_DO_BOX_LAMBDA_DECL(true);   // box-lambda
+#undef EXPLICIT_DO_BOX_LAMBDA_DECL
+
+// Explicitly instantiate all DoUnBoxLambda functions.
+#define EXPLICIT_DO_UNBOX_LAMBDA_DECL(_do_check)                                                \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                            \
+bool DoUnboxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                              uint16_t inst_data);
+
+EXPLICIT_DO_UNBOX_LAMBDA_DECL(false);  // unbox-lambda
+EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
+#undef EXPLICIT_DO_BOX_LAMBDA_DECL
+
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 86027c542f..ec923b6eb2 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -75,6 +75,17 @@ namespace interpreter {
 #define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
 #define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
 
+// Use with instructions labeled with kExperimental flag:
+#define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
+  HANDLE_INSTRUCTION_START(opcode);                                                               \
+  DCHECK(inst->IsExperimental());                                                                 \
+  if (Runtime::Current()->AreExperimentalLambdasEnabled()) {
+#define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
+  } else {                                                                                        \
+      UnexpectedOpcode(inst, shadow_frame);                                                       \
+  } HANDLE_INSTRUCTION_END();
+
+
 /**
  * Interpreter based on computed goto tables.
  *
@@ -1609,6 +1620,14 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
   }
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(INVOKE_LAMBDA) {
+    bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
+                                                   &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(NEG_INT)
     shadow_frame.SetVReg(
         inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
@@ -2390,6 +2409,24 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
     ADVANCE(2);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
+    bool success = DoCreateLambda<true>(self, shadow_frame, inst);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(BOX_LAMBDA) {
+    bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(UNBOX_LAMBDA) {
+    bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2422,10 +2459,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F3)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_F4)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2434,22 +2467,10 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F6)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_F7)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F8)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
-  HANDLE_INSTRUCTION_START(UNUSED_F9)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index fcf083cbe1..78090bbe0c 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -53,6 +53,11 @@ namespace interpreter {
     }                                                                                           \
   } while (false)
 
+static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
+  DCHECK(inst->IsExperimental());
+  return Runtime::Current()->AreExperimentalLambdasEnabled();
+}
+
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register) {
@@ -2217,8 +2222,59 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
+      case Instruction::INVOKE_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
+                                                       &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::CREATE_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNUSED_F4:
+      case Instruction::UNUSED_F5:
+      case Instruction::UNUSED_F7: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        CHECK(false);  // TODO(iam): Implement opcodes for lambdas
+        break;
+      }
+      case Instruction::BOX_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNBOX_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_F3 ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
         UnexpectedOpcode(inst, shadow_frame);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 122cc6b738..7f89b1d4df 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -759,7 +759,7 @@ inline bool Class::GetSlowPathEnabled() {
 }
 
 inline void Class::SetSlowPath(bool enabled) {
-  SetFieldBoolean<false>(GetSlowPathFlagOffset(), enabled);
+  SetFieldBoolean<false, false>(GetSlowPathFlagOffset(), enabled);
 }
 
 inline void Class::InitializeClassVisitor::operator()(
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 551e7e290a..ba0a9fc4e5 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1030,10 +1030,14 @@ class MANAGED Class FINAL : public Object {
   }
 
   static Class* GetJavaLangClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(!java_lang_Class_.IsNull());
+    DCHECK(HasJavaLangClass());
     return java_lang_Class_.Read();
   }
 
+  static bool HasJavaLangClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return !java_lang_Class_.IsNull();
+  }
+
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
   static void SetClassClass(Class* java_lang_Class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void ResetClass();
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 8d9c08d9d5..b689057426 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -162,8 +162,8 @@ inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorTy
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   return down_cast<String*>(
-      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, string_class, size,
-                                                             allocator_type, pre_fence_visitor));
+      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, string_class, size,
+                                                            allocator_type, pre_fence_visitor));
 }
 
 template <bool kIsInstrumented>
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 4b563b563b..5e84df5d0c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -260,6 +260,10 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize
       .Define("--cpu-abilist=_")
           .WithType<std::string>()
           .IntoKey(M::CpuAbiList)
+      .Define({"-Xexperimental-lambdas", "-Xnoexperimental-lambdas"})
+          .WithType<bool>()
+          .WithValues({true, false})
+          .IntoKey(M::ExperimentalLambdas)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -544,6 +548,12 @@ bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognize
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
+  if (args.GetOrDefault(M::ExperimentalLambdas)) {
+    LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
+                 << "an unstable specification and are nearly guaranteed to change over time. "
+                 << "Do not attempt to write shipping code against these opcodes.";
+  }
+
   *runtime_options = std::move(args);
   return true;
 }
@@ -663,6 +673,8 @@ void ParsedOptions::Usage(const char* fmt, ...) {
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
+  UsageMessage(stream, "  -X[no]experimental-lambdas\n"
+                       "     (Enable new experimental dalvik opcodes, off by default)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 66ec7ccf7a..d0f01b3750 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -144,7 +144,10 @@ struct TraceConfig {
 };
 
 Runtime::Runtime()
-    : instruction_set_(kNone),
+    : resolution_method_(nullptr),
+      imt_conflict_method_(nullptr),
+      imt_unimplemented_method_(nullptr),
+      instruction_set_(kNone),
       compiler_callbacks_(nullptr),
       is_zygote_(false),
       must_relocate_(false),
@@ -189,7 +192,8 @@ Runtime::Runtime()
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
       is_native_bridge_loaded_(false),
-      zygote_max_failed_boots_(0) {
+      zygote_max_failed_boots_(0),
+      experimental_lambdas_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
 }
@@ -841,6 +845,7 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized)
   }
 
   zygote_max_failed_boots_ = runtime_options.GetOrDefault(Opt::ZygoteMaxFailedBoots);
+  experimental_lambdas_ = runtime_options.GetOrDefault(Opt::ExperimentalLambdas);
 
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   ATRACE_BEGIN("CreateHeap");
@@ -871,6 +876,7 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized)
                        xgc_option.verify_pre_gc_rosalloc_,
                        xgc_option.verify_pre_sweeping_rosalloc_,
                        xgc_option.verify_post_gc_rosalloc_,
+                       xgc_option.gcstress_,
                        runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
   ATRACE_END();
@@ -1376,6 +1382,7 @@ void Runtime::VisitConstantRoots(RootVisitor* visitor) {
 void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
   intern_table_->VisitRoots(visitor, flags);
   class_linker_->VisitRoots(visitor, flags);
+  heap_->VisitAllocationRecords(visitor);
   if ((flags & kVisitRootFlagNewRoots) == 0) {
     // Guaranteed to have no new roots in the constant roots.
     VisitConstantRoots(visitor);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e569333bf0..3cd7404dda 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -527,6 +527,10 @@ class Runtime {
     return zygote_max_failed_boots_;
   }
 
+  bool AreExperimentalLambdasEnabled() const {
+    return experimental_lambdas_;
+  }
+
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
@@ -727,6 +731,12 @@ class Runtime {
   // zygote.
   uint32_t zygote_max_failed_boots_;
 
+  // Enable experimental opcodes that aren't fully specified yet. The intent is to
+  // eventually publish them as public-usable opcodes, but they aren't ready yet.
+  //
+  // Experimental opcodes should not be used by other production code.
+  bool experimental_lambdas_;
+
   MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 4a307d5ed6..fc527b55ea 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -110,6 +110,7 @@ RUNTIME_OPTIONS_KEY (std::string,         NativeBridge)
 RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           10)
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
+RUNTIME_OPTIONS_KEY (bool,                ExperimentalLambdas,            false) // -X[no]experimental-lambdas
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index a68030287b..d8d21aa5af 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -22,7 +22,7 @@
 namespace art {
 
 template<typename T>
-class StrideIterator : public std::iterator<std::random_access_iterator_tag, T> {
+class StrideIterator : public std::iterator<std::forward_iterator_tag, T> {
  public:
   StrideIterator(const StrideIterator&) = default;
   StrideIterator(StrideIterator&&) = default;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 348d68586a..37a86f1218 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2362,8 +2362,7 @@ class ReferenceMapVisitor : public StackVisitor {
         // Can't be null or how would we compile its instructions?
         DCHECK(code_item != nullptr) << PrettyMethod(m);
         NativePcOffsetToReferenceMap map(native_gc_map);
-        size_t num_regs = std::min(map.RegWidth() * 8,
-                                   static_cast<size_t>(code_item->registers_size_));
+        size_t num_regs = map.RegWidth() * 8;
         if (num_regs > 0) {
           Runtime* runtime = Runtime::Current();
           const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index b86a7ee966..ddd1caa69e 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -172,6 +172,15 @@ MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
   DCHECK(class_def != nullptr);
+
+  // A class must not be abstract and final.
+  if ((class_def->access_flags_ & (kAccAbstract | kAccFinal)) == (kAccAbstract | kAccFinal)) {
+    *error = "Verifier rejected class ";
+    *error += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
+    *error += ": class is abstract and final.";
+    return kHardFailure;
+  }
+
   const uint8_t* class_data = dex_file->GetClassData(*class_def);
   if (class_data == nullptr) {
     // empty class, probably a marker interface
@@ -572,6 +581,7 @@ std::ostream& MethodVerifier::Fail(VerifyError error) {
     case VERIFY_ERROR_ACCESS_METHOD:
     case VERIFY_ERROR_INSTANTIATION:
     case VERIFY_ERROR_CLASS_CHANGE:
+    case VERIFY_ERROR_FORCE_INTERPRETER:
       if (Runtime::Current()->IsAotCompiler() || !can_load_classes_) {
         // If we're optimistically running verification at compile time, turn NO_xxx, ACCESS_xxx,
         // class change and instantiation errors into soft verification errors so that we re-verify
@@ -857,14 +867,18 @@ bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_of
     case Instruction::kVerifyVarArgNonZero:
       // Fall-through.
     case Instruction::kVerifyVarArg: {
-      if (inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgNonZero && inst->VRegA() <= 0) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << inst->VRegA() << ") in "
+      // Instructions that can actually return a negative value shouldn't have this flag.
+      uint32_t v_a = dchecked_integral_cast<uint32_t>(inst->VRegA());
+      if ((inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgNonZero && v_a == 0) ||
+          v_a > Instruction::kMaxVarArgRegs) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << v_a << ") in "
                                              "non-range invoke";
         return false;
       }
+
       uint32_t args[Instruction::kMaxVarArgRegs];
       inst->GetVarArgs(args);
-      result = result && CheckVarArgRegs(inst->VRegA(), args);
+      result = result && CheckVarArgRegs(v_a, args);
       break;
     }
     case Instruction::kVerifyVarArgRangeNonZero:
@@ -1175,10 +1189,6 @@ bool MethodVerifier::CheckSwitchTargets(uint32_t cur_offset) {
 }
 
 bool MethodVerifier::CheckVarArgRegs(uint32_t vA, uint32_t arg[]) {
-  if (vA > Instruction::kMaxVarArgRegs) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << vA << ") in non-range invoke)";
-    return false;
-  }
   uint16_t registers_size = code_item_->registers_size_;
   for (uint32_t idx = 0; idx < vA; idx++) {
     if (arg[idx] >= registers_size) {
@@ -1291,13 +1301,22 @@ static bool IsPrimitiveDescriptor(char descriptor) {
 
 bool MethodVerifier::SetTypesFromSignature() {
   RegisterLine* reg_line = reg_table_.GetLine(0);
-  int arg_start = code_item_->registers_size_ - code_item_->ins_size_;
+
+  // Should have been verified earlier.
+  DCHECK_GE(code_item_->registers_size_, code_item_->ins_size_);
+
+  uint32_t arg_start = code_item_->registers_size_ - code_item_->ins_size_;
   size_t expected_args = code_item_->ins_size_;   /* long/double count as two */
 
-  DCHECK_GE(arg_start, 0);      /* should have been verified earlier */
   // Include the "this" pointer.
   size_t cur_arg = 0;
   if (!IsStatic()) {
+    if (expected_args == 0) {
+      // Expect at least a receiver.
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected 0 args, but method is not static";
+      return false;
+    }
+
     // If this is a constructor for a class other than java.lang.Object, mark the first ("this")
     // argument as uninitialized. This restricts field access until the superclass constructor is
     // called.
@@ -1543,6 +1562,27 @@ bool MethodVerifier::CodeFlowVerifyMethod() {
   return true;
 }
 
+// Returns the index of the first final instance field of the given class, or kDexNoIndex if there
+// is no such field.
+static uint32_t GetFirstFinalInstanceFieldIndex(const DexFile& dex_file, uint16_t type_idx) {
+  const DexFile::ClassDef* class_def = dex_file.FindClassDef(type_idx);
+  DCHECK(class_def != nullptr);
+  const uint8_t* class_data = dex_file.GetClassData(*class_def);
+  DCHECK(class_data != nullptr);
+  ClassDataItemIterator it(dex_file, class_data);
+  // Skip static fields.
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  while (it.HasNextInstanceField()) {
+    if ((it.GetFieldAccessFlags() & kAccFinal) != 0) {
+      return it.GetMemberIndex();
+    }
+    it.Next();
+  }
+  return DexFile::kDexNoIndex;
+}
+
 bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) {
   // If we're doing FindLocksAtDexPc, check whether we're at the dex pc we care about.
   // We want the state _before_ the instruction, for the case where the dex pc we're
@@ -2757,6 +2797,17 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) {
     case Instruction::RETURN_VOID_NO_BARRIER:
       if (IsConstructor() && !IsStatic()) {
         auto& declaring_class = GetDeclaringClass();
+        if (declaring_class.IsUnresolvedReference()) {
+          // We must iterate over the fields, even if we cannot use mirror classes to do so. Do it
+          // manually over the underlying dex file.
+          uint32_t first_index = GetFirstFinalInstanceFieldIndex(*dex_file_,
+              dex_file_->GetMethodId(dex_method_idx_).class_idx_);
+          if (first_index != DexFile::kDexNoIndex) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-no-barrier not expected for field "
+                              << first_index;
+          }
+          break;
+        }
         auto* klass = declaring_class.GetClass();
         for (uint32_t i = 0, num_fields = klass->NumInstanceFields(); i < num_fields; ++i) {
           if (klass->GetInstanceField(i)->IsFinal()) {
@@ -2828,10 +2879,49 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) {
       }
       break;
     }
+    case Instruction::INVOKE_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
+      break;
+    }
+    case Instruction::CREATE_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
+      break;
+    }
+
+    case Instruction::UNUSED_F4:
+    case Instruction::UNUSED_F5:
+    case Instruction::UNUSED_F7: {
+      DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
+      // Conservatively fail verification on release builds.
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
+      break;
+    }
+
+    case Instruction::BOX_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement box-lambda verification
+      break;
+    }
+
+     case Instruction::UNBOX_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement unbox-lambda verification
+      break;
+    }
 
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-    case Instruction::UNUSED_F3 ... Instruction::UNUSED_FF:
+    case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
@@ -3790,8 +3880,9 @@ void MethodVerifier::VerifyAPut(const Instruction* inst,
   } else {
     const RegType& array_type = work_line_->GetRegisterType(this, inst->VRegB_23x());
     if (array_type.IsZero()) {
-      // Null array type; this code path will fail at runtime. Infer a merge-able type from the
-      // instruction type.
+      // Null array type; this code path will fail at runtime.
+      // Still check that the given value matches the instruction's type.
+      work_line_->VerifyRegisterType(this, inst->VRegA_23x(), insn_type);
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 873b8ab094..824daf6fd4 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -77,6 +77,16 @@ enum VerifyError {
   VERIFY_ERROR_ACCESS_METHOD,   // IllegalAccessError.
   VERIFY_ERROR_CLASS_CHANGE,    // IncompatibleClassChangeError.
   VERIFY_ERROR_INSTANTIATION,   // InstantiationError.
+  // For opcodes that don't have complete verifier support (such as lambda opcodes),
+  // we need a way to continue execution at runtime without attempting to re-verify
+  // (since we know it will fail no matter what). Instead, run as the interpreter
+  // in a special "do access checks" mode which will perform verifier-like checking
+  // on the fly.
+  //
+  // TODO: Once all new opcodes have implemented full verifier support, this can be removed.
+  VERIFY_ERROR_FORCE_INTERPRETER,  // Skip the verification phase at runtime;
+                                   // force the interpreter to do access checks.
+                                   // (sets a soft fail at compile time).
 };
 std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
 
diff --git a/test/044-proxy/src/ReturnsAndArgPassing.java b/test/044-proxy/src/ReturnsAndArgPassing.java
index a1734100bc..225cc5b232 100644
--- a/test/044-proxy/src/ReturnsAndArgPassing.java
+++ b/test/044-proxy/src/ReturnsAndArgPassing.java
@@ -57,6 +57,8 @@ public class ReturnsAndArgPassing {
       check(proxy instanceof Proxy);
       check(method.getDeclaringClass() == MyInterface.class);
       String name = method.getName();
+      // Check for moving GC bugs in proxy stubs.
+      Runtime.getRuntime().gc();
       if (name.endsWith("Foo")) {
         check(args == null);
         fooInvocations++;
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 4914ba2289..f41ff2a94a 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -43,24 +43,14 @@ public class Main {
 
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
-        // TODO: Temporary fix. Keep the new objects live so they are not garbage collected.
-        // This will cause OOM exception for GC stress tests. The root cause is changed behaviour of
-        // getRecentAllocations(). Working on restoring its old behaviour. b/20037135
-        Object[] objects = new Object[overflowAllocations];
         for (int i = 0; i < overflowAllocations; i++) {
-            objects[i] = new Object();
+            new Object();
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
         System.out.println("after > before=" + (after.numberOfEntries > before.numberOfEntries));
         System.out.println("after.numberOfEntries=" + after.numberOfEntries);
 
-        // TODO: Temporary fix as above. b/20037135
-        objects = null;
-        Runtime.getRuntime().gc();
-        final int fillerStrings = 16 * 1024;
-        String[] strings = new String[fillerStrings];
-
         System.out.println("Disable and confirm back to empty");
         DdmVmInternal.enableRecentAllocations(false);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
@@ -76,8 +66,8 @@ public class Main {
         System.out.println("Confirm we can reenable twice in a row without losing allocations");
         DdmVmInternal.enableRecentAllocations(true);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
-        for (int i = 0; i < fillerStrings; i++) {
-            strings[i] = new String("fnord");
+        for (int i = 0; i < 16 * 1024; i++) {
+            new String("fnord");
         }
         Allocations first = new Allocations(DdmVmInternal.getRecentAllocations());
         DdmVmInternal.enableRecentAllocations(true);
diff --git a/test/137-cfi/run b/test/137-cfi/run
index 78cf2aaf8d..ecbbbc7c48 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -14,8 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Temporarily disable address space layout randomization (ASLR).
-# This is need on host so that the linker loads core.oat at fixed address.
-export LD_USE_LOAD_BIAS=1
-
 exec ${RUN} "$@"
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 3c3b939aeb..aa4dda17b6 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -927,6 +927,36 @@ public class Main {
     return (false == arg) ? 3 : 5;
   }
 
+  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
+  /// CHECK-DAG:     <<BoolNot:z\d+>>  BooleanNot [<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<BoolNot>>,<<Const2>>]
+  /// CHECK-DAG:                       Return [<<Cond>>]
+
+  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
+  /// CHECK-DAG:                       Return [<<False>>]
+
+  public static boolean EqualBoolVsIntConst(boolean arg) {
+    return (arg ? 0 : 1) == 2;
+  }
+
+  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
+  /// CHECK-DAG:     <<BoolNot:z\d+>>  BooleanNot [<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<BoolNot>>,<<Const2>>]
+  /// CHECK-DAG:                       Return [<<Cond>>]
+
+  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:                       Return [<<True>>]
+
+  public static boolean NotEqualBoolVsIntConst(boolean arg) {
+    return (arg ? 0 : 1) != 2;
+  }
+
   /*
    * Test simplification of double Boolean negation. Note that sometimes
    * both negations can be removed but we only expect the simplifier to
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index da27bf6cf1..ab4afdb547 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -141,7 +141,7 @@
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<Mul9>>,<<PhiX1>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
@@ -158,7 +158,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
 ## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<Mul9>>,<<PhiX1>>]                   loop:none
+## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
 ## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
 
 .method public static testExitPredecessors(IZZ)I
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index f6d0b41a58..4f23eec4a0 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -106,7 +106,7 @@ class LoadedByMyClassLoader {
   }
 }
 
-class Main {
+public class Main {
   public static void main(String[] args) throws Exception {
     MyClassLoader o = new MyClassLoader();
     Class foo = o.loadClass("LoadedByMyClassLoader");
diff --git a/test/506-verify-aput/expected.txt b/test/506-verify-aput/expected.txt
new file mode 100644
index 0000000000..ccaf6f8f0f
--- /dev/null
+++ b/test/506-verify-aput/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/506-verify-aput/info.txt b/test/506-verify-aput/info.txt
new file mode 100644
index 0000000000..461d9d37ad
--- /dev/null
+++ b/test/506-verify-aput/info.txt
@@ -0,0 +1,2 @@
+Test that an aput on a null array is properly checked
+by the verifier.
diff --git a/test/506-verify-aput/smali/VerifyAPut1.smali b/test/506-verify-aput/smali/VerifyAPut1.smali
new file mode 100644
index 0000000000..d50636f4f2
--- /dev/null
+++ b/test/506-verify-aput/smali/VerifyAPut1.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LVerifyAPut1;
+
+.super Ljava/lang/Object;
+
+.method public static method()V
+   .registers 3
+   const/4 v0, 0
+   const/4 v1, 1
+   const/4 v2, 2
+   aput-object v2, v0, v1
+   return-void
+.end method
diff --git a/test/506-verify-aput/smali/VerifyAPut2.smali b/test/506-verify-aput/smali/VerifyAPut2.smali
new file mode 100644
index 0000000000..2eceebb95d
--- /dev/null
+++ b/test/506-verify-aput/smali/VerifyAPut2.smali
@@ -0,0 +1,25 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LVerifyAPut2;
+
+.super Ljava/lang/Object;
+
+.method public static method(LMain;)V
+   .registers 3
+   const/4 v0, 0
+   const/4 v1, 1
+   aput p0, v0, v1
+   return-void
+.end method
diff --git a/test/506-verify-aput/src/Main.java b/test/506-verify-aput/src/Main.java
new file mode 100644
index 0000000000..8359f2c452
--- /dev/null
+++ b/test/506-verify-aput/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    try {
+      Class.forName("VerifyAPut1");
+      throw new Error("expected verification error");
+    } catch (VerifyError e) { /* ignore */ }
+
+    try {
+      Class.forName("VerifyAPut2");
+      throw new Error("expected verification error");
+    } catch (VerifyError e) { /* ignore */ }
+  }
+}
diff --git a/test/507-boolean-test/expected.txt b/test/507-boolean-test/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/507-boolean-test/expected.txt
diff --git a/test/507-boolean-test/info.txt b/test/507-boolean-test/info.txt
new file mode 100644
index 0000000000..15c20c1ec0
--- /dev/null
+++ b/test/507-boolean-test/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler that used to
+crash when compiling (a ? 1 : 0) == 2.
diff --git a/test/507-boolean-test/src/Main.java b/test/507-boolean-test/src/Main.java
new file mode 100644
index 0000000000..f3ce92a644
--- /dev/null
+++ b/test/507-boolean-test/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    if (bar(true)) {
+      throw new Error("Expected false, got true");
+    }
+  }
+
+  public static boolean bar(boolean a) {
+    return (a ? 0 : 1) == 2;
+  }
+}
diff --git a/test/507-referrer/expected.txt b/test/507-referrer/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/507-referrer/expected.txt
diff --git a/test/507-referrer/info.txt b/test/507-referrer/info.txt
new file mode 100644
index 0000000000..1335994d07
--- /dev/null
+++ b/test/507-referrer/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler, which used
+to do incorrect access checks on static fields when inlining.
diff --git a/test/507-referrer/src/Main.java b/test/507-referrer/src/Main.java
new file mode 100644
index 0000000000..6393f39965
--- /dev/null
+++ b/test/507-referrer/src/Main.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import p1.InPackage;
+
+public class Main {
+  public static void main(String[] args) {
+    int result = InPackage.$inline$foo();
+    if (result != 42) {
+      throw new Error("Expected 42, got " + result);
+    }
+  }
+}
diff --git a/test/507-referrer/src/p1/InPackage.java b/test/507-referrer/src/p1/InPackage.java
new file mode 100644
index 0000000000..162f055121
--- /dev/null
+++ b/test/507-referrer/src/p1/InPackage.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class InPackage {
+  public static int $inline$foo() {
+    return myField;
+  }
+
+  protected static int myField = 42;
+}
diff --git a/test/508-checker-disassembly/expected.txt b/test/508-checker-disassembly/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/508-checker-disassembly/expected.txt
diff --git a/test/508-checker-disassembly/info.txt b/test/508-checker-disassembly/info.txt
new file mode 100644
index 0000000000..4a25b21d39
--- /dev/null
+++ b/test/508-checker-disassembly/info.txt
@@ -0,0 +1 @@
+Check that inlining disassembly in the .cfg works correctly.
diff --git a/test/508-checker-disassembly/src/Main.java b/test/508-checker-disassembly/src/Main.java
new file mode 100644
index 0000000000..29c9374aed
--- /dev/null
+++ b/test/508-checker-disassembly/src/Main.java
@@ -0,0 +1,29 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+  // A very simple check that disassembly information has been added to the
+  // graph. We check that sections have been added for the frame entry and a
+  // slow path.
+  /// CHECK-START: int Main.DisassembledFunction(int) disassembly (after)
+  /// CHECK:       FrameEntry
+  /// CHECK:       DivZeroCheckSlowPath{{.*}}
+  public int DisassembledFunction(int arg) {
+    return 7 / arg;
+  }
+
+  public static void main(String[] args) {}
+}
diff --git a/test/508-referrer-method/expected.txt b/test/508-referrer-method/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/508-referrer-method/expected.txt
diff --git a/test/508-referrer-method/info.txt b/test/508-referrer-method/info.txt
new file mode 100644
index 0000000000..5f533e7347
--- /dev/null
+++ b/test/508-referrer-method/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler, which used
+to do incorrect access checks on static calls when inlining.
diff --git a/test/508-referrer-method/src/Main.java b/test/508-referrer-method/src/Main.java
new file mode 100644
index 0000000000..07b25e411c
--- /dev/null
+++ b/test/508-referrer-method/src/Main.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Main needs to be a subclass that has access to the
+// protected p1.PackagePrivateA.method.
+public class Main extends p1.PublicC {
+  public static void main(String[] args) {
+    int result = p1.InPackage.$inline$method();
+    if (result != 42) {
+      throw new Error("Expected 42, got " + result);
+    }
+  }
+}
diff --git a/test/508-referrer-method/src/p1/InPackage.java b/test/508-referrer-method/src/p1/InPackage.java
new file mode 100644
index 0000000000..84c7d2585d
--- /dev/null
+++ b/test/508-referrer-method/src/p1/InPackage.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class InPackage {
+  public static int $inline$method() {
+    // Call must be through a public subclass of the holder
+    // of the protected method, so that the Main class also has
+    // access to it.
+    return PublicB.method();
+  }
+}
diff --git a/test/508-referrer-method/src/p1/PackagePrivateA.java b/test/508-referrer-method/src/p1/PackagePrivateA.java
new file mode 100644
index 0000000000..af8cfe8794
--- /dev/null
+++ b/test/508-referrer-method/src/p1/PackagePrivateA.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+class PackagePrivateA {
+  protected static int method() {
+    return 42;
+  }
+}
diff --git a/test/508-referrer-method/src/p1/PublicB.java b/test/508-referrer-method/src/p1/PublicB.java
new file mode 100644
index 0000000000..58e930488d
--- /dev/null
+++ b/test/508-referrer-method/src/p1/PublicB.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class PublicB extends PackagePrivateA {
+}
diff --git a/test/508-referrer-method/src/p1/PublicC.java b/test/508-referrer-method/src/p1/PublicC.java
new file mode 100644
index 0000000000..d68e93b6b6
--- /dev/null
+++ b/test/508-referrer-method/src/p1/PublicC.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class PublicC extends PackagePrivateA {
+}
diff --git a/test/509-pre-header/expected.txt b/test/509-pre-header/expected.txt
new file mode 100644
index 0000000000..ccaf6f8f0f
--- /dev/null
+++ b/test/509-pre-header/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/509-pre-header/info.txt b/test/509-pre-header/info.txt
new file mode 100644
index 0000000000..e9d8b9411e
--- /dev/null
+++ b/test/509-pre-header/info.txt
@@ -0,0 +1,3 @@
+Regression test for the SimplifyCFG phase of optimizing.
+The invariant that the pre header of a loop header is the
+first predecessor was not preserved.
diff --git a/test/509-pre-header/smali/PreHeader.smali b/test/509-pre-header/smali/PreHeader.smali
new file mode 100644
index 0000000000..04f4e4950c
--- /dev/null
+++ b/test/509-pre-header/smali/PreHeader.smali
@@ -0,0 +1,39 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LPreHeader;
+
+.super Ljava/lang/Object;
+
+# Label names in this method are taken from the original apk
+# that exposed the crash. The crash was due to fixing a critical
+# edge and not preserving the invariant that the pre header of a loop
+# is the first predecessor of the loop header.
+.method public static method()V
+   .registers 2
+   const/4 v0, 0
+   const/4 v1, 0
+   goto :b31
+   :b23
+   if-eqz v0, :b25
+   goto :b23
+   :b25
+   return-void
+   :b31
+   if-eqz v0, :b23
+   if-eqz v1, :bexit
+   goto :b31
+   :bexit
+   return-void
+.end method
diff --git a/test/509-pre-header/src/Main.java b/test/509-pre-header/src/Main.java
new file mode 100644
index 0000000000..1eca41978c
--- /dev/null
+++ b/test/509-pre-header/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    Class<?> c = Class.forName("PreHeader");
+    Method m = c.getMethod("method");
+    Object[] arguments = { };
+    m.invoke(null, arguments);
+  }
+}
diff --git a/test/511-clinit-interface/expected.txt b/test/511-clinit-interface/expected.txt
new file mode 100644
index 0000000000..ccaf6f8f0f
--- /dev/null
+++ b/test/511-clinit-interface/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/511-clinit-interface/info.txt b/test/511-clinit-interface/info.txt
new file mode 100644
index 0000000000..1351b2928b
--- /dev/null
+++ b/test/511-clinit-interface/info.txt
@@ -0,0 +1,2 @@
+Test that compilers don't crash when having to compile
+an interface method.
diff --git a/test/511-clinit-interface/smali/BogusInterface.smali b/test/511-clinit-interface/smali/BogusInterface.smali
new file mode 100644
index 0000000000..619df24afe
--- /dev/null
+++ b/test/511-clinit-interface/smali/BogusInterface.smali
@@ -0,0 +1,23 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public abstract interface LBogusInterface;
+
+.super Ljava/lang/Object;
+
+.method public static <clinit>()V
+   .registers 2
+   invoke-static {}, LMain;->method()V
+   return-void
+.end method
diff --git a/test/511-clinit-interface/src/Main.java b/test/511-clinit-interface/src/Main.java
new file mode 100644
index 0000000000..c4d0c6625e
--- /dev/null
+++ b/test/511-clinit-interface/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    Class<?> c = Class.forName("BogusInterface");
+  }
+
+  public static void method() {
+  }
+}
diff --git a/test/513-array-deopt/expected.txt b/test/513-array-deopt/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/513-array-deopt/expected.txt
diff --git a/test/513-array-deopt/info.txt b/test/513-array-deopt/info.txt
new file mode 100644
index 0000000000..afce9d9942
--- /dev/null
+++ b/test/513-array-deopt/info.txt
@@ -0,0 +1,2 @@
+Regression test for the BCE phase of optimizing,
+that used to have wrong assumptions about array length bounds.
diff --git a/test/513-array-deopt/src/Main.java b/test/513-array-deopt/src/Main.java
new file mode 100644
index 0000000000..a0ae4c3556
--- /dev/null
+++ b/test/513-array-deopt/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static int[] bar(int[] a) {
+    a[0] = 0;
+    a[1] = 0;
+    a[2] = 0;
+    // Up to this point, we record that the lower bound is 2.
+    // The next instruction will record that the lower bound is 5.
+    // The deoptimization code used to assume the lower bound has
+    // to be check it will add for the deoptimization (here, it
+    // would be 2).
+    return new int[a.length - 5];
+  }
+
+  public static void main(String[] args) {
+    int[] a = new int[5];
+    a = bar(a);
+    if (a.length != 0) {
+      throw new Error("Expected 0, got " + a.length);
+    }
+  }
+}
diff --git a/test/514-shifts/expected.txt b/test/514-shifts/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/514-shifts/expected.txt
diff --git a/test/514-shifts/info.txt b/test/514-shifts/info.txt
new file mode 100644
index 0000000000..eb93c5f15b
--- /dev/null
+++ b/test/514-shifts/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to miscompile
+shifts on ARM.
diff --git a/test/514-shifts/src/Main.java b/test/514-shifts/src/Main.java
new file mode 100644
index 0000000000..6c44eaba26
--- /dev/null
+++ b/test/514-shifts/src/Main.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  public static void main(String[] args) {
+    testIntShiftRight();
+    testIntShiftLeft();
+    testIntUnsignedShiftRight();
+    testLongShiftRight();
+    testLongShiftLeft();
+    testLongUnsignedShiftRight();
+  }
+
+  public static void testIntShiftLeft() {
+    int a = myField;
+    int b = myOtherField << a;
+    if (b != -2147483648) {
+      throw new Error("Expected -2147483648, got " + b);
+    }
+    if (a != 0xFFF) {
+      throw new Error("Expected 0xFFF, got " + a);
+    }
+  }
+
+  public static void testIntShiftRight() {
+    int a = myField;
+    int b = myOtherField >> a;
+    if (b != 0) {
+      throw new Error("Expected 0, got " + b);
+    }
+    if (a != 0xFFF) {
+      throw new Error("Expected 0xFFF, got " + a);
+    }
+  }
+
+  public static void testIntUnsignedShiftRight() {
+    int a = myField;
+    int b = myOtherField >>> a;
+    if (b != 0) {
+      throw new Error("Expected 0, got " + b);
+    }
+    if (a != 0xFFF) {
+      throw new Error("Expected 0xFFF, got " + a);
+    }
+  }
+
+  public static void testLongShiftLeft() {
+    long a = myLongField;
+    long b = myOtherLongField << a;
+    if (b != 0x2468ACF13579BDEL) {
+      throw new Error("Expected 0x2468ACF13579BDEL, got " + b);
+    }
+    // The int conversion will be GVN'ed with the one required
+    // by Java specification of long shift left.
+    if ((int)a != 0x41) {
+      throw new Error("Expected 0x41, got " + a);
+    }
+  }
+
+  public static void testLongShiftRight() {
+    long a = myLongField;
+    long b = myOtherLongField >> a;
+    if (b != 0x91A2B3C4D5E6F7L) {
+      throw new Error("Expected 0x91A2B3C4D5E6F7L, got " + b);
+    }
+    // The int conversion will be GVN'ed with the one required
+    // by Java specification of long shift right.
+    if ((int)a != 0x41) {
+      throw new Error("Expected 0x41, got " + a);
+    }
+  }
+
+  public static void testLongUnsignedShiftRight() {
+    long a = myLongField;
+    long b = myOtherLongField >>> a;
+    if (b != 0x91A2B3C4D5E6F7L) {
+      throw new Error("Expected 0x91A2B3C4D5E6F7L, got " + b);
+    }
+    // The int conversion will be GVN'ed with the one required
+    // by Java specification of long shift right.
+    if ((int)a != 0x41) {
+      throw new Error("Expected 0x41, got " + a);
+    }
+  }
+
+  static int myField = 0xFFF;
+  static int myOtherField = 0x1;
+
+  // Use a value that will need to be masked before doing the shift.
+  // The maximum shift is 0x3F.
+  static long myLongField = 0x41;
+  static long myOtherLongField = 0x123456789abcdefL;
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 85656374c5..c762603e57 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -17,4 +17,8 @@ MoveExceptionOnEntry
 EmptySparseSwitch
 b/20224106
 b/17410612
+b/21863767
+b/21873167
+b/21614284
+b/21902684
 Done!
diff --git a/test/800-smali/smali/b_21614284.smali b/test/800-smali/smali/b_21614284.smali
new file mode 100644
index 0000000000..3cb1bd0ce2
--- /dev/null
+++ b/test/800-smali/smali/b_21614284.smali
@@ -0,0 +1,22 @@
+.class public LB21614284;
+.super Ljava/lang/Object;
+
+.field private a:I
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    const v0, 42
+    iput v0, p0, LB21614284;->a:I
+    return-void
+.end method
+
+.method public static test(LB21614284;)I
+    .registers 2
+    # Empty if, testing p0.
+    if-nez p0, :label
+    :label
+    # p0 still needs a null check.
+    iget v0, p0, LB21614284;->a:I
+    return v0
+.end method
diff --git a/test/800-smali/smali/b_21863767.smali b/test/800-smali/smali/b_21863767.smali
new file mode 100644
index 0000000000..9b33bc382d
--- /dev/null
+++ b/test/800-smali/smali/b_21863767.smali
@@ -0,0 +1,29 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB21863767;
+
+.super Ljava/lang/Object;
+
+.method public static run()V
+   .registers 2
+   return-void
+   goto :start
+   :start
+   # The following is dead code but used to crash the compiler.
+   const/4 v0, 0
+   return-wide v0
+   return v0
+   return-object v0
+.end method
diff --git a/test/800-smali/smali/b_21873167.smali b/test/800-smali/smali/b_21873167.smali
new file mode 100644
index 0000000000..c0c09cbbf2
--- /dev/null
+++ b/test/800-smali/smali/b_21873167.smali
@@ -0,0 +1,18 @@
+.class public LB21873167;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public test()V
+    .registers 1
+    :start
+    monitor-enter p0
+    monitor-exit  p0
+    :end
+    return-void
+    .catchall {:start .. :end} :end
+.end method
diff --git a/test/800-smali/smali/b_21902684.smali b/test/800-smali/smali/b_21902684.smali
new file mode 100644
index 0000000000..2d906b61bd
--- /dev/null
+++ b/test/800-smali/smali/b_21902684.smali
@@ -0,0 +1,17 @@
+.class public LB21902684;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public test()V
+    .registers 1
+    goto :end
+    new-instance v0, Ljava/lang/String;
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    :end
+    return-void
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 33df06d87a..7280d45b86 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -83,6 +83,12 @@ public class Main {
                 0));
         testCases.add(new TestCase("b/17410612", "B17410612", "run", null, new VerifyError(),
                 0));
+        testCases.add(new TestCase("b/21863767", "B21863767", "run", null, null,
+                null));
+        testCases.add(new TestCase("b/21873167", "B21873167", "test", null, null, null));
+        testCases.add(new TestCase("b/21614284", "B21614284", "test", new Object[] { null },
+            new NullPointerException(), null));
+        testCases.add(new TestCase("b/21902684", "B21902684", "test", null, null, null));
     }
 
     public void runTests() {
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
new file mode 100644
index 0000000000..0a5b5fd37d
--- /dev/null
+++ b/test/955-lambda-smali/expected.txt
@@ -0,0 +1,8 @@
+SanityCheck
+Hello world! (0-args, no closure)
+ABCD Hello world! (4-args, no closure)
+Caught NPE
+(BoxUnbox) Hello boxing world! (0-args, no closure)
+(BoxUnbox) Caught NPE for unbox-lambda
+(BoxUnbox) Caught NPE for box-lambda
+(BoxUnbox) Caught ClassCastException for unbox-lambda
diff --git a/test/955-lambda-smali/info.txt b/test/955-lambda-smali/info.txt
new file mode 100644
index 0000000000..aed5e848c4
--- /dev/null
+++ b/test/955-lambda-smali/info.txt
@@ -0,0 +1,3 @@
+Smali-based tests for experimental lambda intructions.
+
+Obviously needs to run under ART.
diff --git a/test/955-lambda-smali/run b/test/955-lambda-smali/run
new file mode 100755
index 0000000000..2aeca8c8fc
--- /dev/null
+++ b/test/955-lambda-smali/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
+${RUN} "$@" --runtime-option -Xexperimental-lambdas -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental-lambdas
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
new file mode 100644
index 0000000000..5e6673368c
--- /dev/null
+++ b/test/955-lambda-smali/smali/BoxUnbox.smali
@@ -0,0 +1,118 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LBoxUnbox;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 2
+    # Trivial 0-arg hello world
+    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    invoke-static {}, LBoxUnbox;->testFailures()V
+    invoke-static {}, LBoxUnbox;->testFailures2()V
+    invoke-static {}, LBoxUnbox;->testFailures3()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    .registers 3 # 1 parameters, 2 locals
+
+    const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    # attempting to unbox a null lambda will throw NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "(BoxUnbox) Caught NPE for unbox-lambda"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures2()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    box-lambda v2, v0  # attempting to box a null lambda will throw NPE
+:end
+    return-void
+
+    # TODO: refactor testFailures using a goto
+
+:handler
+    const-string v2, "(BoxUnbox) Caught NPE for box-lambda"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures3()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const-string v0, "This is not a boxed lambda"
+:start
+    # TODO: use \FunctionalType; here instead
+    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    # can't use a string, expects a lambda object here. throws ClassCastException.
+:end
+    return-void
+
+    # TODO: refactor testFailures using a goto
+
+:handler
+    const-string v2, "(BoxUnbox) Caught ClassCastException for unbox-lambda"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/ClassCastException; {:start .. :end} :handler
+.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
new file mode 100644
index 0000000000..92afd79ada
--- /dev/null
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -0,0 +1,30 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LMain;
+
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .registers 2
+
+    invoke-static {}, LSanityCheck;->run()I
+    invoke-static {}, LTrivialHelloWorld;->run()V
+    invoke-static {}, LBoxUnbox;->run()V
+
+# TODO: add tests when verification fails
+
+    return-void
+.end method
diff --git a/test/955-lambda-smali/smali/SanityCheck.smali b/test/955-lambda-smali/smali/SanityCheck.smali
new file mode 100644
index 0000000000..4c807d7df9
--- /dev/null
+++ b/test/955-lambda-smali/smali/SanityCheck.smali
@@ -0,0 +1,36 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LSanityCheck;
+.super Ljava/lang/Object;
+
+
+.method public constructor <init>()V
+.registers 1
+   invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+   return-void
+.end method
+
+# This test is just here to make sure that we can at least execute basic non-lambda
+# functionality such as printing (when lambdas are enabled in the runtime).
+.method public static run()I
+# Don't use too many registers here to avoid hitting the Stack::SanityCheck frame<2KB assert
+.registers 3
+    const-string v0, "SanityCheck"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    const v2, 123456
+    return v2
+.end method
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
new file mode 100644
index 0000000000..38ee95ac7e
--- /dev/null
+++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
@@ -0,0 +1,94 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LTrivialHelloWorld;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 8
+    # Trivial 0-arg hello world
+    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Slightly more interesting 4-arg hello world
+    create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    # TODO: create-lambda should not write to both v2 and v3
+    const-string v4, "A"
+    const-string v5, "B"
+    const-string v6, "C"
+    const-string v7, "D"
+    invoke-lambda v2, {v4, v5, v6, v7}
+
+    invoke-static {}, LTrivialHelloWorld;->testFailures()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    .registers 3 # 1 parameters, 2 locals
+
+    const-string v0, "Hello world! (0-args, no closure)"
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    .registers 7 # 5 parameters, 2 locals
+
+    const-string v0, " Hello world! (4-args, no closure)"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    invoke-lambda v0, {}  # invoking a null lambda shall raise an NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "Caught NPE"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c2380cc668..ac9656b78c 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -229,10 +229,19 @@ endif
 
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
-# 098-ddmc is broken until we restore the old behavior of getRecentAllocation() of DDMS. b/20037135
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), 098-ddmc, $(ALL_ADDRESS_SIZES))
+# Tests that are broken with GC stress.
+# 098-ddmc is broken until the allocation tracker does not mark recently allocated objects as roots.
+# Marking them roots is for consistent behavior with DDMS's getRecentAllocations(). b/20037135
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
+  098-ddmc
+
+ifneq (,$(filter gcstress,$(GC_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \
+      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
 
 # 115-native-bridge setup is complicated. Need to implement it correctly for the target.
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
@@ -398,6 +407,37 @@ endif
 
 TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
 
+# Known broken tests for the MIPS64 optimizing compiler backend in 64-bit mode.  b/21555893
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS := \
+  004-SignalTest \
+  018-stack-overflow \
+  107-int-math2 \
+  449-checker-bce
+
+ifeq ($(TARGET_ARCH),mips64)
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS),64)
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS :=
+
+# Known broken tests for the MIPS64 optimizing compiler backend in 32-bit mode.  b/21555893
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS := \
+  496-checker-inlining-and-class-loader
+
+ifeq ($(TARGET_ARCH),mips64)
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS),32)
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS :=
+
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
 
diff --git a/test/etc/default-build b/test/etc/default-build
index fbe97f999b..92954a98cc 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -18,6 +18,7 @@
 set -e
 
 DX_FLAGS=""
+SKIP_DX_MERGER="false"
 
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
@@ -38,22 +39,36 @@ if [ -e classes.dex ]; then
   exit 0
 fi
 
-mkdir classes
-${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+if [ -d src ]; then
+  mkdir classes
+  ${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+fi
 
 if [ -d src2 ]; then
+  mkdir -p classes
   ${JAVAC} -d classes `find src2 -name '*.java'`
 fi
 
-if [ ${NEED_DEX} = "true" ]; then
+if ! [ -d src ] && ! [ -d src2 ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
     --dump-width=1000 ${DX_FLAGS} classes
 fi
 
 if [ -d smali ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx256m --output smali_classes.dex `find smali -name '*.smali'`
-  ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  ${SMALI} -JXmx256m --experimental --api-level 23 --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
 fi
 
 if [ -d src-ex ]; then
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 09841bfcec..cf6be83d6e 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -409,6 +409,10 @@ else
     export DYLD_LIBRARY_PATH="${ANDROID_ROOT}/lib"
     export PATH="$PATH:${ANDROID_ROOT}/bin"
 
+    # Temporarily disable address space layout randomization (ASLR).
+    # This is needed on the host so that the linker loads core.oat at the necessary address.
+    export LD_USE_LOAD_BIAS=1
+
     cmdline="$dalvikvm_cmdline"
 
     if [ "$TIME_OUT" = "y" ]; then