35 files changed, 879 insertions, 281 deletions
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 85216b7610..bc51ed6e6a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -22,6 +22,7 @@
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
+#include "base/unix_file/fd_file.h"
 #include "compiler_callbacks.h"
 #include "dex/pass_manager.h"
 #include "dex/quick_compiler_callbacks.h"
@@ -42,11 +43,12 @@ JitCompiler* JitCompiler::Create() {
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks) {
+extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
   *callbacks = jit_compiler->GetCompilerCallbacks();
+  *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
 }
@@ -160,9 +162,28 @@ JitCompiler::JitCompiler() : total_time_(0) {
   // Disable dedupe so we can remove compiled methods.
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
+
+  if (compiler_options_->GetGenerateDebugInfo()) {
+#ifdef __ANDROID__
+    const char* prefix = GetAndroidData();
+#else
+    const char* prefix = "/tmp";
+#endif
+    DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u)
+        << "Generating debug info only works with one compiler thread";
+    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
+    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+    if (perf_file_ == nullptr) {
+      LOG(FATAL) << "Could not create perf file at " << perf_filename;
+    }
+  }
 }
 
 JitCompiler::~JitCompiler() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
 }
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
@@ -188,6 +209,20 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
+    if (success && compiler_options_->GetGenerateDebugInfo()) {
+      const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
+      std::ostringstream stream;
+      stream << std::hex
+             << reinterpret_cast<uintptr_t>(ptr)
+             << " "
+             << code_cache->GetMemorySizeOfCodePointer(ptr)
+             << " "
+             << PrettyMethod(method_to_compile)
+             << std::endl;
+      std::string str = stream.str();
+      bool res = perf_file_->WriteFully(str.c_str(), str.size());
+      CHECK(res);
+    }
   }
 
   // Trim maps to reduce memory usage.
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 913a6d00ae..037a18ac7a 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -43,6 +43,9 @@ class JitCompiler {
   size_t GetTotalCompileTime() const {
     return total_time_;
   }
+  CompilerOptions* GetCompilerOptions() const {
+    return compiler_options_.get();
+  }
 
  private:
   uint64_t total_time_;
@@ -53,6 +56,7 @@ class JitCompiler {
   std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<File> perf_file_;
 
   JitCompiler();
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 0156187765..322912976e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1539,8 +1539,10 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
   bool use_imm = rhs_location.IsConstant();
   Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
   int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
-  uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
-  uint32_t shift_value = rhs_imm & shift_mask;
+  const uint32_t shift_mask = (type == Primitive::kPrimInt)
+      ? kMaxIntShiftValue
+      : kMaxLongShiftValue;
+  const uint32_t shift_value = rhs_imm & shift_mask;
   // Are the INS (Insert Bit Field) and ROTR instructions supported?
   bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 
@@ -1580,6 +1582,11 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
             __ Rotrv(dst, lhs, rhs_reg);
           } else {
             __ Subu(TMP, ZERO, rhs_reg);
+            // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+            // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+            // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+            // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+            // IOW, the OR'd values are equal.
             __ Sllv(TMP, lhs, TMP);
             __ Srlv(dst, lhs, rhs_reg);
             __ Or(dst, dst, TMP);
@@ -1643,33 +1650,33 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
               }
             }
           } else {
-            shift_value -= kMipsBitsPerWord;
+            const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
             if (instr->IsShl()) {
-              __ Sll(dst_high, lhs_low, shift_value);
+              __ Sll(dst_high, lhs_low, shift_value_high);
               __ Move(dst_low, ZERO);
             } else if (instr->IsShr()) {
-              __ Sra(dst_low, lhs_high, shift_value);
+              __ Sra(dst_low, lhs_high, shift_value_high);
               __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
             } else if (instr->IsUShr()) {
-              __ Srl(dst_low, lhs_high, shift_value);
+              __ Srl(dst_low, lhs_high, shift_value_high);
               __ Move(dst_high, ZERO);
             } else {
-              if (shift_value == 0) {
+              if (shift_value == kMipsBitsPerWord) {
                 // 64-bit rotation by 32 is just a swap.
                 __ Move(dst_low, lhs_high);
                 __ Move(dst_high, lhs_low);
               } else {
                 if (has_ins_rotr) {
-                  __ Srl(dst_low, lhs_high, shift_value);
-                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
-                  __ Srl(dst_high, lhs_low, shift_value);
-                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
                 } else {
-                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value);
-                  __ Srl(dst_low, lhs_high, shift_value);
+                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
                   __ Or(dst_low, dst_low, TMP);
-                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
-                  __ Srl(dst_high, lhs_low, shift_value);
+                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
                   __ Or(dst_high, dst_high, TMP);
                 }
               }
@@ -4797,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
   Primitive::Type input_type = conversion->GetInputType();
   Primitive::Type result_type = conversion->GetResultType();
   DCHECK_NE(input_type, result_type);
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -4804,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
   }
 
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+  if (!isR6 &&
+      ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+       (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
     call_kind = LocationSummary::kCall;
   }
 
@@ -4843,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4888,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
+    if (input_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+        Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+        FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+        __ Mtc1(src_low, FTMP);
+        __ Mthc1(src_high, FTMP);
+        if (result_type == Primitive::kPrimFloat) {
+          __ Cvtsl(dst, FTMP);
+        } else {
+          __ Cvtdl(dst, FTMP);
+        }
+      } else {
+        int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                      : QUICK_ENTRY_POINT(pL2d);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+                                                             : IsDirectEntrypoint(kQuickL2d);
+        codegen_->InvokeRuntime(entry_offset,
+                                conversion,
+                                conversion->GetDexPc(),
+                                nullptr,
+                                direct);
+        if (result_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        } else {
+          CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        }
+      }
+    } else {
       Register src = locations->InAt(0).AsRegister<Register>();
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       __ Mtc1(src, FTMP);
@@ -4897,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
-                                                           : IsDirectEntrypoint(kQuickL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    bool direct;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
-                                                      : IsDirectEntrypoint(kQuickD2iz);
+    if (result_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+        MipsLabel truncate;
+        MipsLabel done;
+
+        // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+        // value when the input is either a NaN or is outside of the range of the output type
+        // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+        // the same result.
+        //
+        // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+        // value of the output type if the input is outside of the range after the truncation or
+        // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+        // results. This matches the desired float/double-to-int/long conversion exactly.
+        //
+        // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+        //
+        // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+        // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+        // even though it must be NAN2008=1 on R6.
+        //
+        // The code takes care of the different behaviors by first comparing the input to the
+        // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+        // If the input is greater than or equal to the minimum, it procedes to the truncate
+        // instruction, which will handle such an input the same way irrespective of NAN2008.
+        // Otherwise the input is compared to itself to determine whether it is a NaN or not
+        // in order to return either zero or the minimum value.
+        //
+        // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+        // truncate instruction for MIPS64R6.
+        if (input_type == Primitive::kPrimFloat) {
+          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, min_val);
+          __ Mtc1(TMP, FTMP);
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, High32Bits(min_val));
+          __ Mtc1(ZERO, FTMP);
+          __ Mthc1(TMP, FTMP);
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ Move(dst_low, ZERO);
+        __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst_high, dst_high, TMP);
+
+        __ B(&done);
+
+        __ Bind(&truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ TruncLS(FTMP, src);
+        } else {
+          __ TruncLD(FTMP, src);
+        }
+        __ Mfc1(dst_low, FTMP);
+        __ Mfhc1(dst_high, FTMP);
+
+        __ Bind(&done);
+      } else {
+        int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                                     : QUICK_ENTRY_POINT(pD2l);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+                                                             : IsDirectEntrypoint(kQuickD2l);
+        codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+        if (input_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        } else {
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        }
+      }
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
-                                                      : IsDirectEntrypoint(kQuickD2l);
-    }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr,
-                            direct);
-    if (result_type != Primitive::kPrimLong) {
+      FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+      Register dst = locations->Out().AsRegister<Register>();
+      MipsLabel truncate;
+      MipsLabel done;
+
+      // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+      // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+      // even though it must be NAN2008=1 on R6.
+      //
+      // For details see the large comment above for the truncation of float/double to long on R6.
+      //
+      // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+      // truncate instruction for MIPS64R6.
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+        uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, min_val);
+        __ Mtc1(TMP, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, High32Bits(min_val));
+        __ Mtc1(ZERO, FTMP);
+        if (fpu_32bit) {
+          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
+        } else {
+          __ Mthc1(TMP, FTMP);
+        }
       }
-    } else {
+
+      if (isR6) {
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst, dst, TMP);
+      } else {
+        if (input_type == Primitive::kPrimFloat) {
+          __ ColeS(0, FTMP, src);
+        } else {
+          __ ColeD(0, FTMP, src);
+        }
+        __ Bc1t(0, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CeqS(0, src, src);
+        } else {
+          __ CeqD(0, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Movf(dst, ZERO, 0);
+      }
+
+      __ B(&done);
+
+      __ Bind(&truncate);
+
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        __ TruncWS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        __ TruncWD(FTMP, src);
       }
+      __ Mfc1(dst, FTMP);
+
+      __ Bind(&done);
     }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index abfaae4b50..38c32cad06 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3932,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   }
 
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
-    call_kind = LocationSummary::kCall;
-  }
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
 
-  if (call_kind == LocationSummary::kNoCall) {
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(0, Location::RequiresRegister());
-    }
-
-    if (Primitive::IsFloatingPointType(result_type)) {
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-    } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-    }
+  if (Primitive::IsFloatingPointType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
   } else {
-    InvokeRuntimeCallingConvention calling_convention;
-
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-    } else {
-      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    }
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
 
-    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+  if (Primitive::IsFloatingPointType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
@@ -4006,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
-      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
-      __ Mtc1(src, FTMP);
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    if (input_type == Primitive::kPrimLong) {
+      __ Dmtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
-        __ Cvtsw(dst, FTMP);
+        __ Cvtsl(dst, FTMP);
       } else {
-        __ Cvtdw(dst, FTMP);
+        __ Cvtdl(dst, FTMP);
       }
     } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr);
+      __ Mtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        __ Cvtsw(dst, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        __ Cvtdw(dst, FTMP);
       }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    Mips64Label truncate;
+    Mips64Label done;
+
+    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+    // value when the input is either a NaN or is outside of the range of the output type
+    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+    // the same result.
+    //
+    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+    // value of the output type if the input is outside of the range after the truncation or
+    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+    // results. This matches the desired float/double-to-int/long conversion exactly.
+    //
+    // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+    //
+    // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+    // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+    // even though it must be NAN2008=1 on R6.
+    //
+    // The code takes care of the different behaviors by first comparing the input to the
+    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+    // If the input is greater than or equal to the minimum, it procedes to the truncate
+    // instruction, which will handle such an input the same way irrespective of NAN2008.
+    // Otherwise the input is compared to itself to determine whether it is a NaN or not
+    // in order to return either zero or the minimum value.
+    //
+    // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+    // truncate instruction for MIPS64R6.
+    if (input_type == Primitive::kPrimFloat) {
+      uint32_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+      __ LoadConst32(TMP, min_val);
+      __ Mtc1(TMP, FTMP);
+      __ CmpLeS(FTMP, FTMP, src);
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
+      uint64_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+      __ LoadConst64(TMP, min_val);
+      __ Dmtc1(TMP, FTMP);
+      __ CmpLeD(FTMP, FTMP, src);
     }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr);
-    if (result_type != Primitive::kPrimLong) {
+
+    __ Bc1nez(FTMP, &truncate);
+
+    if (input_type == Primitive::kPrimFloat) {
+      __ CmpEqS(FTMP, src, src);
+    } else {
+      __ CmpEqD(FTMP, src, src);
+    }
+    if (result_type == Primitive::kPrimLong) {
+      __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+    } else {
+      __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+    }
+    __ Mfc1(TMP, FTMP);
+    __ And(dst, dst, TMP);
+
+    __ Bc(&done);
+
+    __ Bind(&truncate);
+
+    if (result_type == Primitive::kPrimLong) {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+        __ TruncLS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        __ TruncLD(FTMP, src);
       }
+      __ Dmfc1(dst, FTMP);
     } else {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        __ TruncWS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        __ TruncWD(FTMP, src);
       }
+      __ Mfc1(dst, FTMP);
     }
+
+    __ Bind(&done);
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4eacaa..4784de1380 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
       stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
     }
 
-    if (entry.num_dex_registers == 0) {
+    if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
       stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
     } else {
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502fde6..604787fd92 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
   stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.EndStackMapEntry();
 
+  number_of_dex_registers = 1;
+  stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
+
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
@@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
   CodeInfo code_info(region);
   StackMapEncoding encoding = code_info.ExtractEncoding();
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
   uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(0u, number_of_location_catalog_entries);
@@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
 
   ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
   ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+
+  stack_map = code_info.GetStackMapAt(1, encoding);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+  ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
+  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
+  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
 }
 
 TEST(StackMapTest, InlineTest) {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 8c462436a7..ac9c097892 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -1035,6 +1035,22 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) {
   EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
 }
 
+void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
+void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -1051,6 +1067,14 @@ void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
+void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 9aed3463b7..01c6490f88 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -265,10 +265,16 @@ class MipsAssembler FINAL : public Assembler {
   void Movf(Register rd, Register rs, int cc);  // R2
   void Movt(Register rd, Register rs, int cc);  // R2
 
+  void TruncLS(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncLD(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncWS(FRegister fd, FRegister fs);
+  void TruncWD(FRegister fd, FRegister fs);
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
   void Cvtds(FRegister fd, FRegister fs);
+  void Cvtsl(FRegister fd, FRegister fs);  // R2+, FR=1
+  void Cvtdl(FRegister fd, FRegister fs);  // R2+, FR=1
 
   void Mfc1(Register rt, FRegister fs);
   void Mtc1(Register rt, FRegister fs);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 4361843c54..5fc3deebd3 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -599,6 +599,14 @@ TEST_F(AssemblerMIPSTest, CvtDW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
 }
 
+TEST_F(AssemblerMIPSTest, CvtSL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSD) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
 }
@@ -607,6 +615,22 @@ TEST_F(AssemblerMIPSTest, CvtDS) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
 }
 
+TEST_F(AssemblerMIPSTest, TruncWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD");
+}
+
 TEST_F(AssemblerMIPSTest, Mfc1) {
   DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
 }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index cfd8421e93..f9ff2df8bb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -771,6 +771,22 @@ void Mips64Assembler::RoundWD(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc);
 }
 
+void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
+void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
 void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 883f013f87..3262640ce7 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -250,6 +250,10 @@ class Mips64Assembler FINAL : public Assembler {
   void RoundLD(FpuRegister fd, FpuRegister fs);
   void RoundWS(FpuRegister fd, FpuRegister fs);
   void RoundWD(FpuRegister fd, FpuRegister fs);
+  void TruncLS(FpuRegister fd, FpuRegister fs);
+  void TruncLD(FpuRegister fd, FpuRegister fs);
+  void TruncWS(FpuRegister fd, FpuRegister fs);
+  void TruncWD(FpuRegister fd, FpuRegister fs);
   void CeilLS(FpuRegister fd, FpuRegister fs);
   void CeilLD(FpuRegister fd, FpuRegister fs);
   void CeilWS(FpuRegister fd, FpuRegister fs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bac4375b35..7d79be2731 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -527,6 +527,22 @@ TEST_F(AssemblerMIPS64Test, CvtSW) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
 }
 
+TEST_F(AssemblerMIPS64Test, TruncWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 0691f2a620..699ab3e65a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1312,7 +1312,114 @@ END \name
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # t4: thread stack bottom offset
+    # v0: free list head
+    #
+    # t5, t6 : temps
+
+    lw    $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1)       # Load dex cache resolved types
+                                                               # array.
+
+    sll   $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT            # Shift the value.
+    addu  $t5, $t0, $t5                                        # Compute the index.
+    lw    $t0, 0($t5)                                          # Load class (t0).
+    beqz  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li    $t6, MIRROR_CLASS_STATUS_INITIALIZED
+    lw    $t5, MIRROR_CLASS_STATUS_OFFSET($t0)                 # Check class status.
+    bne   $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor   $t5, $t5, $t5
+    addu  $t0, $t0, $t5
+
+    lw    $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)           # Check if access flags has
+    li    $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE               # kAccClassIsFinalizable.
+    and   $t6, $t5, $t6
+    bnez  $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)        # Check if thread local allocation
+    lw    $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)        # stack has any room left.
+    bgeu  $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)            # Load object size (t1).
+    li    $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE          # Check if size is for a thread local
+                                                               # allocation.
+    bgtu  $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+
+    addiu $t1, $t1, -1                                         # Decrease obj size and shift right
+    srl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT        # by quantum.
+
+    sll   $t2, $t1, POINTER_SIZE_SHIFT
+    addu  $t2, $t2, $s1
+    lw    $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)                # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+
+    lw    $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqz  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+    nop
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+
+    lw    $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw    $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+
+    sw    $v0, 0($t3)
+    addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+
+    lw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu $t5, $t5, -1
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                                          # Fence.
+
+    jalr  $zero, $ra
+    nop
+
+  .Lart_quick_alloc_object_rosalloc_slow_path:
+
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    jal   artAllocObjectFromCodeRosAlloc
+    move  $a2 ,$s1                                                # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 75e0037145..617f572c0f 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -128,9 +128,11 @@ MterpCheckSuspendAndContinue:
  */
 MterpFallback:
     EXPORT_PC
+#if MTERP_LOGGING
     mov  r0, rSELF
     add  r1, rFP, #OFF_FP_SHADOWFRAME
     bl MterpLogFallback
+#endif
 MterpCommonFallback:
     mov     r0, #0                                  @ signal retry with reference interpreter.
     b       MterpDone
@@ -144,9 +146,6 @@ MterpCommonFallback:
  *  uint32_t* rFP  (should still be live, pointer to base of vregs)
  */
 MterpExceptionReturn:
-    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
-    str     r0, [r2]
-    str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
     b MterpDone
 MterpReturn:
diff --git a/runtime/interpreter/mterp/arm/op_aget.S b/runtime/interpreter/mterp/arm/op_aget.S
index 2cc4d66565..11f7079c3f 100644
--- a/runtime/interpreter/mterp/arm/op_aget.S
+++ b/runtime/interpreter/mterp/arm/op_aget.S
@@ -1,11 +1,11 @@
-%default { "load":"ldr", "shift":"2", "is_object":"0", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
     /*
      * Array get, 32 bits or less.  vAA <- vBB[vCC].
      *
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -25,9 +25,5 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     $load   r2, [r0, #$data_offset]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if $is_object
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_object_quick.S b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
index 1f8dc5afb2..fe29106d47 100644
--- a/runtime/interpreter/mterp/arm/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
@@ -1 +1,17 @@
-%include "arm/op_iget_quick.S" {"is_object":"1"}
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    cmp     r0, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_quick.S b/runtime/interpreter/mterp/arm/op_iget_quick.S
index 9229afcd1e..0eaf364f6b 100644
--- a/runtime/interpreter/mterp/arm/op_iget_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iget_quick.S
@@ -1,5 +1,5 @@
-%default { "load":"ldr", "is_object":"0" }
-    /* For: iget-quick, iget-object-quick */
+%default { "load":"ldr" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -9,10 +9,6 @@
     beq     common_errNullObject        @ object was null
     $load   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if $is_object
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 060fe76aab..9975458b85 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -607,5 +607,14 @@ extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t i
   }
 }
 
+extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(obj == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 33036e6cd7..2d6f057aa7 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -2013,7 +2013,7 @@ artMterpAsmInstructionStart = .L_op_nop
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2033,11 +2033,7 @@ artMterpAsmInstructionStart = .L_op_nop
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldr   r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -2106,7 +2102,7 @@ artMterpAsmInstructionStart = .L_op_nop
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2126,11 +2122,7 @@ artMterpAsmInstructionStart = .L_op_nop
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrb   r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -2145,7 +2137,7 @@ artMterpAsmInstructionStart = .L_op_nop
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2165,11 +2157,7 @@ artMterpAsmInstructionStart = .L_op_nop
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrsb   r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -2184,7 +2172,7 @@ artMterpAsmInstructionStart = .L_op_nop
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2204,11 +2192,7 @@ artMterpAsmInstructionStart = .L_op_nop
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrh   r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -2223,7 +2207,7 @@ artMterpAsmInstructionStart = .L_op_nop
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2243,11 +2227,7 @@ artMterpAsmInstructionStart = .L_op_nop
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrsh   r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -7127,7 +7107,7 @@ constvalop_long_to_double:
     .balign 128
 .L_op_iget_quick: /* 0xe3 */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7137,11 +7117,7 @@ constvalop_long_to_double:
     beq     common_errNullObject        @ object was null
     ldr   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7167,26 +7143,24 @@ constvalop_long_to_double:
     .balign 128
 .L_op_iget_object_quick: /* 0xe5 */
 /* File: arm/op_iget_object_quick.S */
-/* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-object-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
-    GET_VREG r3, r2                     @ r3<- object we're operating on
-    ubfx    r2, rINST, #8, #4           @ r2<- A
-    cmp     r3, #0                      @ check object for null
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    cmp     r0, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
-    ldr   r0, [r3, r1]                @ r0<- obj.field
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 1
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
     SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
-    SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
+    ADVANCE 2                           @ advance rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_iput_quick: /* 0xe6 */
@@ -7373,7 +7347,7 @@ constvalop_long_to_double:
 .L_op_iget_boolean_quick: /* 0xef */
 /* File: arm/op_iget_boolean_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7383,11 +7357,7 @@ constvalop_long_to_double:
     beq     common_errNullObject        @ object was null
     ldrb   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7397,7 +7367,7 @@ constvalop_long_to_double:
 .L_op_iget_byte_quick: /* 0xf0 */
 /* File: arm/op_iget_byte_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7407,11 +7377,7 @@ constvalop_long_to_double:
     beq     common_errNullObject        @ object was null
     ldrsb   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7421,7 +7387,7 @@ constvalop_long_to_double:
 .L_op_iget_char_quick: /* 0xf1 */
 /* File: arm/op_iget_char_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7431,11 +7397,7 @@ constvalop_long_to_double:
     beq     common_errNullObject        @ object was null
     ldrh   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7445,7 +7407,7 @@ constvalop_long_to_double:
 .L_op_iget_short_quick: /* 0xf2 */
 /* File: arm/op_iget_short_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7455,11 +7417,7 @@ constvalop_long_to_double:
     beq     common_errNullObject        @ object was null
     ldrsh   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -12204,9 +12162,11 @@ MterpCheckSuspendAndContinue:
  */
 MterpFallback:
     EXPORT_PC
+#if MTERP_LOGGING
     mov  r0, rSELF
     add  r1, rFP, #OFF_FP_SHADOWFRAME
     bl MterpLogFallback
+#endif
 MterpCommonFallback:
     mov     r0, #0                                  @ signal retry with reference interpreter.
     b       MterpDone
@@ -12220,9 +12180,6 @@ MterpCommonFallback:
  *  uint32_t* rFP  (should still be live, pointer to base of vregs)
  */
 MterpExceptionReturn:
-    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
-    str     r0, [r2]
-    str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
     b MterpDone
 MterpReturn:
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ab70f4c158..05668a97b3 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -64,10 +64,14 @@ void Jit::AddTimingLogger(const TimingLogger& logger) {
   cumulative_timings_.AddLogger(logger);
 }
 
-Jit::Jit()
-    : jit_library_handle_(nullptr), jit_compiler_handle_(nullptr), jit_load_(nullptr),
-      jit_compile_method_(nullptr), dump_info_on_shutdown_(false),
-      cumulative_timings_("JIT timings"), save_profiling_info_(false) {
+Jit::Jit() : jit_library_handle_(nullptr),
+             jit_compiler_handle_(nullptr),
+             jit_load_(nullptr),
+             jit_compile_method_(nullptr),
+             dump_info_on_shutdown_(false),
+             cumulative_timings_("JIT timings"),
+             save_profiling_info_(false),
+             generate_debug_info_(false) {
 }
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
@@ -77,7 +81,10 @@ Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
     return nullptr;
   }
   jit->code_cache_.reset(JitCodeCache::Create(
-      options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg));
+      options->GetCodeCacheInitialCapacity(),
+      options->GetCodeCacheMaxCapacity(),
+      jit->generate_debug_info_,
+      error_msg));
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
@@ -99,7 +106,7 @@ bool Jit::LoadCompiler(std::string* error_msg) {
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**)>(
+  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>(
       dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
@@ -121,9 +128,10 @@ bool Jit::LoadCompiler(std::string* error_msg) {
     return false;
   }
   CompilerCallbacks* callbacks = nullptr;
+  bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks);
+  jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
@@ -136,6 +144,7 @@ bool Jit::LoadCompiler(std::string* error_msg) {
     return false;
   }
   compiler_callbacks_ = callbacks;
+  generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 0edce2fa49..42bbbe73c7 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -86,7 +86,7 @@ class Jit {
   // JIT compiler
   void* jit_library_handle_;
   void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**);
+  void* (*jit_load_)(CompilerCallbacks**, bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
 
@@ -99,6 +99,7 @@ class Jit {
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
   bool save_profiling_info_;
+  bool generate_debug_info_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 1ac57b1d84..bf3bd3c4aa 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -49,8 +49,16 @@ static constexpr int kProtCode = PROT_READ | PROT_EXEC;
 
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
+                                   bool generate_debug_info,
                                    std::string* error_msg) {
   CHECK_GE(max_capacity, initial_capacity);
+
+  // Generating debug information is mostly for using the 'perf' tool, which does
+  // not work with ashmem.
+  bool use_ashmem = !generate_debug_info;
+  // With 'perf', we want a 1-1 mapping between an address and a method.
+  bool garbage_collect_code = !generate_debug_info;
+
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -65,7 +73,7 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   MemMap* data_map = MemMap::MapAnonymous(
-    "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str);
+      "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem);
   if (data_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -84,7 +92,8 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
   DCHECK_EQ(code_size + data_size, max_capacity);
   uint8_t* divider = data_map->Begin() + data_size;
 
-  MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
   if (code_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -95,14 +104,16 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
   data_size = initial_capacity / 2;
   code_size = initial_capacity - data_size;
   DCHECK_EQ(code_size + data_size, initial_capacity);
-  return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity);
+  return new JitCodeCache(
+      code_map, data_map, code_size, data_size, garbage_collect_code, max_capacity);
 }
 
 JitCodeCache::JitCodeCache(MemMap* code_map,
                            MemMap* data_map,
                            size_t initial_code_capacity,
                            size_t initial_data_capacity,
-                           size_t max_capacity)
+                           size_t max_capacity,
+                           bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache variable", lock_),
       collection_in_progress_(false),
@@ -113,7 +124,8 @@ JitCodeCache::JitCodeCache(MemMap* code_map,
       code_end_(initial_code_capacity),
       data_end_(initial_data_capacity),
       has_done_one_collection_(false),
-      last_update_time_ns_(0) {
+      last_update_time_ns_(0),
+      garbage_collect_code_(garbage_collect_code) {
 
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
@@ -516,7 +528,11 @@ void JitCodeCache::GarbageCollectCache(Thread* self) {
   // we hold the lock.
   {
     MutexLock mu(self, lock_);
-    if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
+    if (!garbage_collect_code_) {
+      IncreaseCodeCacheCapacity();
+      NotifyCollectionDone(self);
+      return;
+    } else if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
       has_done_one_collection_ = false;
       NotifyCollectionDone(self);
       return;
@@ -730,5 +746,10 @@ void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSE
   info->SetIsMethodBeingCompiled(false);
 }
 
+size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
+  MutexLock mu(Thread::Current(), lock_);
+  return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 1c842e4aa7..a152bcd2d4 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -53,7 +53,10 @@ class JitCodeCache {
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
-  static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg);
+  static JitCodeCache* Create(size_t initial_capacity,
+                              size_t max_capacity,
+                              bool generate_debug_info,
+                              std::string* error_msg);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSize() REQUIRES(!lock_);
@@ -159,13 +162,16 @@ class JitCodeCache {
     return current_capacity_;
   }
 
+  size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
                MemMap* data_map,
                size_t initial_code_capacity,
                size_t initial_data_capacity,
-               size_t max_capacity);
+               size_t max_capacity,
+               bool garbage_collect_code);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -252,6 +258,9 @@ class JitCodeCache {
   // It is atomic to avoid locking when reading it.
   Atomic<uint64_t> last_update_time_ns_;
 
+  // Whether we can do garbage collection.
+  const bool garbage_collect_code_;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index e133847b06..3571edb277 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -34,14 +34,11 @@
 #include "thread-inl.h"
 #include "utils.h"
 
-#define USE_ASHMEM 1
-
-#ifdef USE_ASHMEM
 #include <cutils/ashmem.h>
+
 #ifndef ANDROID_OS
 #include <sys/resource.h>
 #endif
-#endif
 
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
@@ -282,7 +279,8 @@ MemMap* MemMap::MapAnonymous(const char* name,
                              int prot,
                              bool low_4gb,
                              bool reuse,
-                             std::string* error_msg) {
+                             std::string* error_msg,
+                             bool use_ashmem) {
 #ifndef __LP64__
   UNUSED(low_4gb);
 #endif
@@ -303,17 +301,17 @@ MemMap* MemMap::MapAnonymous(const char* name,
 
   ScopedFd fd(-1);
 
-#ifdef USE_ASHMEM
-#ifdef __ANDROID__
-  const bool use_ashmem = true;
-#else
-  // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
-  // fail due to ulimit restrictions. If they will then use a regular mmap.
-  struct rlimit rlimit_fsize;
-  CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
-  const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
-      (page_aligned_byte_count < rlimit_fsize.rlim_cur);
-#endif
+  if (use_ashmem) {
+    if (!kIsTargetBuild) {
+      // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
+      // fail due to ulimit restrictions. If they will then use a regular mmap.
+      struct rlimit rlimit_fsize;
+      CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
+      use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
+        (page_aligned_byte_count < rlimit_fsize.rlim_cur);
+    }
+  }
+
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
@@ -326,7 +324,6 @@ MemMap* MemMap::MapAnonymous(const char* name,
     }
     flags &= ~MAP_ANONYMOUS;
   }
-#endif
 
   // We need to store and potentially set an error number for pretty printing of errors
   int saved_errno = 0;
@@ -508,7 +505,7 @@ MemMap::MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_
 }
 
 MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
-                           std::string* error_msg) {
+                           std::string* error_msg, bool use_ashmem) {
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
@@ -532,23 +529,22 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_pro
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
-#ifdef USE_ASHMEM
-  // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
-  // prefixed "dalvik-".
-  std::string debug_friendly_name("dalvik-");
-  debug_friendly_name += tail_name;
-  ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-  int flags = MAP_PRIVATE | MAP_FIXED;
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
-                              tail_name, strerror(errno));
-    return nullptr;
-  }
-#else
-  ScopedFd fd(-1);
+  int int_fd = -1;
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#endif
-
+  if (use_ashmem) {
+    // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
+    // prefixed "dalvik-".
+    std::string debug_friendly_name("dalvik-");
+    debug_friendly_name += tail_name;
+    int_fd = ashmem_create_region(debug_friendly_name.c_str(), tail_base_size);
+    flags = MAP_PRIVATE | MAP_FIXED;
+    if (int_fd == -1) {
+      *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
+                                tail_name, strerror(errno));
+      return nullptr;
+    }
+  }
+  ScopedFd fd(int_fd);
 
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Unmap/map the tail region.
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index efce09ae94..ed213659e3 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -57,17 +57,18 @@ class MemMap {
   // "reuse" allows re-mapping an address range from an existing mapping.
   //
   // The word "anonymous" in this context means "not backed by a file". The supplied
-  // 'ashmem_name' will be used -- on systems that support it -- to give the mapping
+  // 'name' will be used -- on systems that support it -- to give the mapping
   // a name.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapAnonymous(const char* ashmem_name,
+  static MemMap* MapAnonymous(const char* name,
                               uint8_t* addr,
                               size_t byte_count,
                               int prot,
                               bool low_4gb,
                               bool reuse,
-                              std::string* error_msg);
+                              std::string* error_msg,
+                              bool use_ashmem = true);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -168,7 +169,8 @@ class MemMap {
   MemMap* RemapAtEnd(uint8_t* new_end,
                      const char* tail_name,
                      int tail_prot,
-                     std::string* error_msg);
+                     std::string* error_msg,
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 9cb37eed58..786cf06e2d 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -221,18 +221,22 @@ void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor*
   CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
+  // Find stack map of the catch block.
+  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
+  DCHECK(catch_stack_map.IsValid());
+  DexRegisterMap catch_vreg_map =
+      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  if (!catch_vreg_map.IsValid()) {
+    return;
+  }
+
   // Find stack map of the throwing instruction.
   StackMap throw_stack_map =
       code_info.GetStackMapForNativePcOffset(stack_visitor->GetNativePcOffset(), encoding);
   DCHECK(throw_stack_map.IsValid());
   DexRegisterMap throw_vreg_map =
       code_info.GetDexRegisterMapOf(throw_stack_map, encoding, number_of_vregs);
-
-  // Find stack map of the catch block.
-  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
-  DCHECK(catch_stack_map.IsValid());
-  DexRegisterMap catch_vreg_map =
-      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  DCHECK(throw_vreg_map.IsValid());
 
   // Copy values between them.
   for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
@@ -387,6 +391,10 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor {
                                              number_of_vregs)
         : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
 
+    if (!vreg_map.IsValid()) {
+      return;
+    }
+
     for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
       if (updated_vregs != nullptr && updated_vregs[vreg]) {
         // Keep the value set by debugger.
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9098d38bb0..5faff93b97 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -322,6 +322,9 @@ bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKin
                                            number_of_dex_registers)
       : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
 
+  if (!dex_register_map.IsValid()) {
+    return false;
+  }
   DexRegisterLocation::Kind location_kind =
       dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info, encoding);
   switch (location_kind) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a15a08180e..84185ce49f 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -473,6 +473,9 @@ class DexRegisterLocationCatalog {
 class DexRegisterMap {
  public:
   explicit DexRegisterMap(MemoryRegion region) : region_(region) {}
+  DexRegisterMap() {}
+
+  bool IsValid() const { return region_.pointer() != nullptr; }
 
   // Get the surface kind of Dex register `dex_register_number`.
   DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_number,
@@ -1136,11 +1139,14 @@ class CodeInfo {
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
                                      const StackMapEncoding& encoding,
                                      uint32_t number_of_dex_registers) const {
-    DCHECK(stack_map.HasDexRegisterMap(encoding));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + stack_map.GetDexRegisterMapOffset(encoding);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!stack_map.HasDexRegisterMap(encoding)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + stack_map.GetDexRegisterMapOffset(encoding);
+      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`.
@@ -1148,11 +1154,14 @@ class CodeInfo {
                                           InlineInfo inline_info,
                                           const StackMapEncoding& encoding,
                                           uint32_t number_of_dex_registers) const {
-    DCHECK(inline_info.HasDexRegisterMapAtDepth(depth));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!inline_info.HasDexRegisterMapAtDepth(depth)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
+      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   InlineInfo GetInlineInfoOf(StackMap stack_map, const StackMapEncoding& encoding) const {
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
new file mode 100644
index 0000000000..54879fbad9
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "base/casts.h"
+#include "base/macros.h"
+#include "java_vm_ext.h"
+#include "jni_env_ext.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+static volatile std::atomic<bool> vm_was_shutdown(false);
+
+extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait until the runtime is shutdown.
+  while (!vm_was_shutdown.load()) {
+    usleep(1000);
+  }
+  std::cout << "About to call exception check\n";
+  env->ExceptionCheck();
+  LOG(ERROR) << "Should not be reached!";
+}
+
+// NO_RETURN does not work with extern "C" for target builds.
+extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Fake up the managed stack so we can detach.
+  Thread* const self = Thread::Current();
+  self->SetTopOfStack(nullptr);
+  self->SetTopOfShadowStack(nullptr);
+  JavaVM* vm = down_cast<JNIEnvExt*>(env)->vm;
+  vm->DetachCurrentThread();
+  vm->DestroyJavaVM();
+  vm_was_shutdown.store(true);
+  // Give threads some time to get stuck in ExceptionCheck.
+  usleep(1000000);
+  if (env != nullptr) {
+    // Use env != nullptr to trick noreturn.
+    exit(0);
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/136-daemon-jni-shutdown/expected.txt b/test/136-daemon-jni-shutdown/expected.txt
new file mode 100644
index 0000000000..f0b6353e9f
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+About to call exception check
+About to call exception check
+About to call exception check
+About to call exception check
diff --git a/test/136-daemon-jni-shutdown/info.txt b/test/136-daemon-jni-shutdown/info.txt
new file mode 100644
index 0000000000..06a12dff9e
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/info.txt
@@ -0,0 +1 @@
+Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
+\ No newline at end of file
diff --git a/test/136-daemon-jni-shutdown/src/Main.java b/test/136-daemon-jni-shutdown/src/Main.java
new file mode 100644
index 0000000000..6eceb757b1
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
+ */
+public class Main {
+
+    public final static int THREAD_COUNT = 4;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        for (int i = 0; i < THREAD_COUNT; i++) {
+            Thread t = new Thread(new DaemonRunnable());
+            t.setDaemon(true);
+            t.start();
+        }
+        // Give threads time to start and become stuck in waitAndCallIntoJniEnv.
+        Thread.sleep(1000);
+        destroyJavaVMAndExit();
+    }
+
+    static native void waitAndCallIntoJniEnv();
+    static native void destroyJavaVMAndExit();
+
+    private static class DaemonRunnable implements Runnable {
+        public void run() {
+            for (;;) {
+                waitAndCallIntoJniEnv();
+            }
+        }
+    }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index f74a516486..b922b4576e 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -30,6 +30,7 @@ LIBARTTEST_COMMON_SRC_FILES := \
   051-thread/thread_test.cc \
   117-nopatchoat/nopatchoat.cc \
   1337-gc-coverage/gc_coverage.cc \
+  136-daemon-jni-shutdown/daemon_jni_shutdown.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index ee6b7aa4f7..c9343d48cd 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -548,6 +548,11 @@ TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
   484-checker-register-hints \
   537-checker-arraycopy
 
+# Tests that should fail in the read barrier configuration with JIT.
+# 141: Disabled because of intermittent failures on the ART Builtbot (b/25866001).
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := \
+  141-class-unload
+
 ifeq ($(ART_USE_READ_BARRIER),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
@@ -562,10 +567,18 @@ ifeq ($(ART_USE_READ_BARRIER),true)
         $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
+
+  ifneq (,$(filter jit,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
 TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
 # 137: Quick has no support for read barriers and punts to the
@@ -873,20 +886,20 @@ define define-test-art-run-test
         ifeq ($(9),multiimage)
           test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES
           run_test_options += --multi-image
-      		ifeq ($(1),host)
-        		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
-      		else
-        		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
-      		endif
+                ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                endif
         else
           ifeq ($(9),multipicimage)
             test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
-        		run_test_options += --pic-image --multi-image
-        		ifeq ($(1),host)
-          		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
-        		else
-          		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
-        		endif
+                        run_test_options += --pic-image --multi-image
+                        ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        endif
           else
             $$(error found $(9) expected $(IMAGE_TYPES))
           endif
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 3d25d5f807..2cb2c50257 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -24,10 +24,12 @@
   bug: 26155567
 },
 {
-  description: "TimeoutException on host-x86-concurrent-collector",
+  description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
   result: EXEC_FAILED,
-  names: ["libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries,
-           libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
+  modes: [host],
+  names: ["libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
+          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
+          "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
   bug: 26507762
 }
 ]