Merge "Use optimizing's pic boot image in art script."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 288bddd..b507124 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -300,9 +300,17 @@
   art_asflags += -DART_HEAP_POISONING=1
 endif
 
+#
+# Used to change the read barrier type. Valid values are BAKER, BROOKS, TABLELOOKUP.
+# The default is BAKER.
+#
+ART_READ_BARRIER_TYPE ?= BAKER
+
 ifeq ($(ART_USE_READ_BARRIER),true)
   art_cflags += -DART_USE_READ_BARRIER=1
+  art_cflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1
   art_asflags += -DART_USE_READ_BARRIER=1
+  art_asflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1
 endif
 
 ifeq ($(ART_USE_TLAB),true)
@@ -392,7 +400,6 @@
 art_non_debug_cflags :=
 art_host_non_debug_cflags :=
 art_target_non_debug_cflags :=
-art_default_gc_type :=
 art_default_gc_type_cflags :=
 
 ART_HOST_LDLIBS :=
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 71a55bb..9775f6a 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -351,6 +351,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES_mips64 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
+  compiler/utils/mips64/assembler_mips64_test.cc \
 
 COMPILER_GTEST_HOST_SRC_FILES_x86 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_x86) \
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 1b4d161..b60905d 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -404,6 +404,29 @@
   GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
+  // Some architectures, such as ARM and MIPS (prior to r6), have a
+  // conditional move instruction which only changes the target
+  // (output) register if the condition is true (MIPS prior to r6 had
+  // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+  // change the target (output) register.  If the condition is true the
+  // output register gets the contents of the "rs" register; otherwise,
+  // the output register is set to zero. One consequence of this is
+  // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+  // needs to use a pair of SELEQZ/SELNEZ instructions.  After
+  // executing this pair of instructions one of the output registers
+  // from the pair will necessarily contain zero. Then the code ORs the
+  // output registers from the SELEQZ/SELNEZ instructions to get the
+  // final result.
+  //
+  // The initial test to see if the output register is same as the
+  // first input register is needed to make sure that value in the
+  // first input register isn't clobbered before we've finished
+  // computing the output value. The logic in the corresponding else
+  // clause performs the same task but makes sure the second input
+  // register isn't clobbered in the event that it's the same register
+  // as the output register; the else clause also handles the case
+  // where the output register is distinct from both the first, and the
+  // second input registers.
   if (out == lhs) {
     __ Slt(AT, rhs, lhs);
     if (is_min) {
@@ -512,13 +535,12 @@
   CreateFPToFP(arena_, invoke);
 }
 
-// 0x200 - +zero
-// 0x040 - +infinity
-// 0x020 - -zero
-// 0x004 - -infinity
-// 0x002 - quiet NaN
-// 0x001 - signaling NaN
-const constexpr uint16_t CLASS_MASK = 0x267;
+const constexpr uint16_t kFPLeaveUnchanged = kPositiveZero |
+                                             kPositiveInfinity |
+                                             kNegativeZero |
+                                             kNegativeInfinity |
+                                             kQuietNaN |
+                                             kSignalingNaN;
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
@@ -534,7 +556,7 @@
   //     }
   __ ClassD(out, in);
   __ Dmfc1(AT, out);
-  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ Andi(AT, AT, kFPLeaveUnchanged);   // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
   __ MovD(out, in);
   __ Bnezc(AT, &done);
 
@@ -583,7 +605,7 @@
   //     }
   __ ClassD(out, in);
   __ Dmfc1(AT, out);
-  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ Andi(AT, AT, kFPLeaveUnchanged);   // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
   __ MovD(out, in);
   __ Bnezc(AT, &done);
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3e982dc..5177b9a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -310,9 +310,6 @@
 
   std::unique_ptr<std::ostream> visualizer_output_;
 
-  // Delegate to Quick in case the optimizing compiler cannot compile a method.
-  std::unique_ptr<Compiler> delegate_;
-
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
 };
 
@@ -321,11 +318,9 @@
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
     : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
       run_optimizations_(
-          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
-      delegate_(Create(driver, Compiler::Kind::kQuick)) {}
+          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
 
 void OptimizingCompiler::Init() {
-  delegate_->Init();
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
@@ -344,7 +339,6 @@
 }
 
 void OptimizingCompiler::UnInit() const {
-  delegate_->UnInit();
 }
 
 OptimizingCompiler::~OptimizingCompiler() {
@@ -353,8 +347,7 @@
   }
 }
 
-void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const {
-  delegate_->InitCompilationUnit(cu);
+void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const {
 }
 
 bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
@@ -862,15 +855,6 @@
     }
   }
 
-  if (method != nullptr) {
-    return method;
-  }
-  method = delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
-                              jclass_loader, dex_file, dex_cache);
-
-  if (method != nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kCompiledQuick);
-  }
   return method;
 }
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index df45c8e..6375cf1 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,7 +29,6 @@
   kAttemptCompilation = 0,
   kCompiledBaseline,
   kCompiledOptimized,
-  kCompiledQuick,
   kInlinedInvoke,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
@@ -74,14 +73,11 @@
           compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
       size_t optimized_percent =
           compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      size_t quick_percent =
-          compile_stats_[kCompiledQuick] * 100 / compile_stats_[kAttemptCompilation];
       std::ostringstream oss;
       oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
 
       oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
       oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-      oss << quick_percent << "% (" << compile_stats_[kCompiledQuick] << ") quick.";
 
       LOG(INFO) << oss.str();
 
@@ -100,7 +96,6 @@
       case kAttemptCompilation : return "kAttemptCompilation";
       case kCompiledBaseline : return "kCompiledBaseline";
       case kCompiledOptimized : return "kCompiledOptimized";
-      case kCompiledQuick : return "kCompiledQuick";
       case kInlinedInvoke : return "kInlinedInvoke";
       case kInstructionSimplifications: return "kInstructionSimplifications";
       case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index 2a0912e..4380596 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -287,7 +287,7 @@
       case 1:
         return Base::REG2_TOKEN;
       case 2:
-        return REG3_TOKEN;
+        return Base::REG3_TOKEN;
       case 3:
         return REG4_TOKEN;
       default:
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 017402d..bd994f4 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -92,6 +92,17 @@
         fmt);
   }
 
+  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
   std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
@@ -118,6 +129,66 @@
     return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
   }
 
+  template <typename Reg1Type, typename Reg2Type, typename ImmType,
+            RegisterView Reg1View, RegisterView Reg2View>
+  std::string RepeatRegRegImmBits(void (Ass::*f)(Reg1Type, Reg2Type, ImmType),
+                                  int imm_bits,
+                                  std::string fmt) {
+    const std::vector<Reg1Type*> reg1_registers = GetRegisters();
+    const std::vector<Reg2Type*> reg2_registers = GetRegisters();
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (int64_t imm : imms) {
+          ImmType new_imm = CreateImmediate(imm);
+          (assembler_.get()->*f)(*reg1, *reg2, new_imm);
+          std::string base = fmt;
+
+          std::string reg1_string = GetRegName<Reg1View>(*reg1);
+          size_t reg1_index;
+          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+          }
+
+          std::string reg2_string = GetRegName<Reg2View>(*reg2);
+          size_t reg2_index;
+          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+          }
+
+          size_t imm_index = base.find(IMM_TOKEN);
+          if (imm_index != std::string::npos) {
+            std::ostringstream sreg;
+            sreg << imm;
+            std::string imm_string = sreg.str();
+            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+          }
+
+          if (str.size() > 0) {
+            str += "\n";
+          }
+          str += base;
+        }
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  template <typename Reg1Type, typename Reg2Type, typename ImmType>
+  std::string RepeatRRIb(void (Ass::*f)(Reg1Type, Reg2Type, ImmType),
+                         int imm_bits,
+                         std::string fmt) {
+    return RepeatRegRegImmBits<Reg1Type,
+                               Reg2Type,
+                               ImmType,
+                               RegisterView::kUsePrimaryName,
+                               RegisterView::kUsePrimaryName>(f, imm_bits, fmt);
+  }
+
   std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg>(f,
                                                   GetFPRegisters(),
@@ -127,14 +198,27 @@
                                                   fmt);
   }
 
-  std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), std::string fmt) {
+    return RepeatTemplatedRegisters<FPReg, FPReg, FPReg>(f,
+                                                         GetFPRegisters(),
+                                                         GetFPRegisters(),
+                                                         GetFPRegisters(),
+                                                         &AssemblerTest::GetFPRegName,
+                                                         &AssemblerTest::GetFPRegName,
+                                                         &AssemblerTest::GetFPRegName,
+                                                         fmt);
+  }
+
+  std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&),
+                        size_t imm_bytes,
+                        std::string fmt) {
     return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
-                                                  GetFPRegisters(),
-                                                  GetFPRegisters(),
-                                                  &AssemblerTest::GetFPRegName,
-                                                  &AssemblerTest::GetFPRegName,
-                                                  imm_bytes,
-                                                  fmt);
+                                                     GetFPRegisters(),
+                                                     GetFPRegisters(),
+                                                     &AssemblerTest::GetFPRegName,
+                                                     &AssemblerTest::GetFPRegName,
+                                                     imm_bytes,
+                                                     fmt);
   }
 
   std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
@@ -339,6 +423,63 @@
     return res;
   }
 
+  const int kMaxBitsExhaustiveTest = 8;
+
+  // Create a couple of immediate values up to the number of bits given.
+  virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, bool as_uint = false) {
+    CHECK_GT(imm_bits, 0);
+    CHECK_LE(imm_bits, 64);
+    std::vector<int64_t> res;
+
+    if (imm_bits <= kMaxBitsExhaustiveTest) {
+      if (as_uint) {
+        for (uint64_t i = MinInt<uint64_t>(imm_bits); i <= MaxInt<uint64_t>(imm_bits); i++) {
+          res.push_back(static_cast<int64_t>(i));
+        }
+      } else {
+        for (int64_t i = MinInt<int64_t>(imm_bits); i <= MaxInt<int64_t>(imm_bits); i++) {
+          res.push_back(i);
+        }
+      }
+    } else {
+      if (as_uint) {
+        for (uint64_t i = MinInt<uint64_t>(kMaxBitsExhaustiveTest);
+             i <= MaxInt<uint64_t>(kMaxBitsExhaustiveTest);
+             i++) {
+          res.push_back(static_cast<int64_t>(i));
+        }
+        for (int i = 0; i <= imm_bits; i++) {
+          uint64_t j = (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1) +
+                       ((MaxInt<uint64_t>(imm_bits) -
+                        (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1))
+                        * i / imm_bits);
+          res.push_back(static_cast<int64_t>(j));
+        }
+      } else {
+        for (int i = 0; i <= imm_bits; i++) {
+          int64_t j = MinInt<int64_t>(imm_bits) +
+                      ((((MinInt<int64_t>(kMaxBitsExhaustiveTest) - 1) -
+                         MinInt<int64_t>(imm_bits))
+                        * i) / imm_bits);
+          res.push_back(static_cast<int64_t>(j));
+        }
+        for (int64_t i = MinInt<int64_t>(kMaxBitsExhaustiveTest);
+             i <= MaxInt<int64_t>(kMaxBitsExhaustiveTest);
+             i++) {
+          res.push_back(static_cast<int64_t>(i));
+        }
+        for (int i = 0; i <= imm_bits; i++) {
+          int64_t j = (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1) +
+                      ((MaxInt<int64_t>(imm_bits) - (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1))
+                       * i / imm_bits);
+          res.push_back(static_cast<int64_t>(j));
+        }
+      }
+    }
+
+    return res;
+  }
+
   // Create an immediate from the specific value.
   virtual Imm CreateImmediate(int64_t imm_value) = 0;
 
@@ -406,6 +547,52 @@
     return str;
   }
 
+  template <typename Reg1, typename Reg2, typename Reg3>
+  std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3),
+                                       const std::vector<Reg1*> reg1_registers,
+                                       const std::vector<Reg2*> reg2_registers,
+                                       const std::vector<Reg3*> reg3_registers,
+                                       std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                       std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                       std::string (AssemblerTest::*GetName3)(const Reg3&),
+                                       std::string fmt) {
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (auto reg3 : reg3_registers) {
+          (assembler_.get()->*f)(*reg1, *reg2, *reg3);
+          std::string base = fmt;
+
+          std::string reg1_string = (this->*GetName1)(*reg1);
+          size_t reg1_index;
+          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+          }
+
+          std::string reg2_string = (this->*GetName2)(*reg2);
+          size_t reg2_index;
+          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+          }
+
+          std::string reg3_string = (this->*GetName3)(*reg3);
+          size_t reg3_index;
+          while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) {
+            base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string);
+          }
+
+          if (str.size() > 0) {
+            str += "\n";
+          }
+          str += base;
+        }
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   template <typename Reg1, typename Reg2>
   std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&),
                                           const std::vector<Reg1*> reg1_registers,
@@ -500,6 +687,7 @@
   static constexpr const char* REG_TOKEN = "{reg}";
   static constexpr const char* REG1_TOKEN = "{reg1}";
   static constexpr const char* REG2_TOKEN = "{reg2}";
+  static constexpr const char* REG3_TOKEN = "{reg3}";
   static constexpr const char* IMM_TOKEN = "{imm}";
 
  private:
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index c170313..d083eb4 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -46,6 +46,20 @@
   kStoreDoubleword
 };
 
+// Used to test the values returned by ClassS/ClassD.
+enum FPClassMaskType {
+  kSignalingNaN      = 0x001,
+  kQuietNaN          = 0x002,
+  kNegativeInfinity  = 0x004,
+  kNegativeNormal    = 0x008,
+  kNegativeSubnormal = 0x010,
+  kNegativeZero      = 0x020,
+  kPositiveInfinity  = 0x040,
+  kPositiveNormal    = 0x080,
+  kPositiveSubnormal = 0x100,
+  kPositiveZero      = 0x200,
+};
+
 class Mips64Assembler FINAL : public Assembler {
  public:
   Mips64Assembler() {}
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
new file mode 100644
index 0000000..2071aca
--- /dev/null
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_mips64.h"
+
+#include <inttypes.h>
+#include <map>
+#include <random>
+
+#include "base/bit_utils.h"
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+namespace art {
+
+struct MIPS64CpuRegisterCompare {
+  bool operator()(const mips64::GpuRegister& a, const mips64::GpuRegister& b) const {
+    return a < b;
+  }
+};
+
+class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
+                                                 mips64::GpuRegister,
+                                                 mips64::FpuRegister,
+                                                 uint32_t> {
+ public:
+  typedef AssemblerTest<mips64::Mips64Assembler,
+                        mips64::GpuRegister,
+                        mips64::FpuRegister,
+                        uint32_t> Base;
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "mips64";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    return " --no-warn -march=mips64r6";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mmips:isa64r6";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.push_back(new mips64::GpuRegister(mips64::ZERO));
+      registers_.push_back(new mips64::GpuRegister(mips64::AT));
+      registers_.push_back(new mips64::GpuRegister(mips64::V0));
+      registers_.push_back(new mips64::GpuRegister(mips64::V1));
+      registers_.push_back(new mips64::GpuRegister(mips64::A0));
+      registers_.push_back(new mips64::GpuRegister(mips64::A1));
+      registers_.push_back(new mips64::GpuRegister(mips64::A2));
+      registers_.push_back(new mips64::GpuRegister(mips64::A3));
+      registers_.push_back(new mips64::GpuRegister(mips64::A4));
+      registers_.push_back(new mips64::GpuRegister(mips64::A5));
+      registers_.push_back(new mips64::GpuRegister(mips64::A6));
+      registers_.push_back(new mips64::GpuRegister(mips64::A7));
+      registers_.push_back(new mips64::GpuRegister(mips64::T0));
+      registers_.push_back(new mips64::GpuRegister(mips64::T1));
+      registers_.push_back(new mips64::GpuRegister(mips64::T2));
+      registers_.push_back(new mips64::GpuRegister(mips64::T3));
+      registers_.push_back(new mips64::GpuRegister(mips64::S0));
+      registers_.push_back(new mips64::GpuRegister(mips64::S1));
+      registers_.push_back(new mips64::GpuRegister(mips64::S2));
+      registers_.push_back(new mips64::GpuRegister(mips64::S3));
+      registers_.push_back(new mips64::GpuRegister(mips64::S4));
+      registers_.push_back(new mips64::GpuRegister(mips64::S5));
+      registers_.push_back(new mips64::GpuRegister(mips64::S6));
+      registers_.push_back(new mips64::GpuRegister(mips64::S7));
+      registers_.push_back(new mips64::GpuRegister(mips64::T8));
+      registers_.push_back(new mips64::GpuRegister(mips64::T9));
+      registers_.push_back(new mips64::GpuRegister(mips64::K0));
+      registers_.push_back(new mips64::GpuRegister(mips64::K1));
+      registers_.push_back(new mips64::GpuRegister(mips64::GP));
+      registers_.push_back(new mips64::GpuRegister(mips64::SP));
+      registers_.push_back(new mips64::GpuRegister(mips64::S8));
+      registers_.push_back(new mips64::GpuRegister(mips64::RA));
+
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::ZERO), "zero");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::AT), "at");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::V0), "v0");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::V1), "v1");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A0), "a0");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A1), "a1");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A2), "a2");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A3), "a3");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A4), "a4");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A5), "a5");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A6), "a6");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::A7), "a7");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::T0), "t0");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::T1), "t1");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::T2), "t2");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::T3), "t3");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S0), "s0");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S1), "s1");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S2), "s2");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S3), "s3");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S4), "s4");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S5), "s5");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S6), "s6");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S7), "s7");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::T8), "t8");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::T9), "t9");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::K0), "k0");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::K1), "k1");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::GP), "gp");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::SP), "sp");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::S8), "s8");
+      secondary_register_names_.emplace(mips64::GpuRegister(mips64::RA), "ra");
+
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F0));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F1));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F2));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F3));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F4));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F5));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F6));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F7));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F8));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F9));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F10));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F11));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F12));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F13));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F14));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F15));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F16));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F17));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F18));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F19));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F20));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F21));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F22));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F23));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F24));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F25));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F26));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F27));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F28));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F29));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F30));
+      fp_registers_.push_back(new mips64::FpuRegister(mips64::F31));
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
+  }
+
+  std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  std::vector<mips64::FpuRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+  std::string GetSecondaryRegisterName(const mips64::GpuRegister& reg) OVERRIDE {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
+ private:
+  std::vector<mips64::GpuRegister*> registers_;
+  std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_;
+
+  std::vector<mips64::FpuRegister*> fp_registers_;
+};
+
+
+TEST_F(AssemblerMIPS64Test, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+
+///////////////////
+// FP Operations //
+///////////////////
+
+TEST_F(AssemblerMIPS64Test, SqrtS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::SqrtS, "sqrt.s ${reg1}, ${reg2}"), "sqrt.s");
+}
+
+TEST_F(AssemblerMIPS64Test, SqrtD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::SqrtD, "sqrt.d ${reg1}, ${reg2}"), "sqrt.d");
+}
+
+TEST_F(AssemblerMIPS64Test, AbsS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::AbsS, "abs.s ${reg1}, ${reg2}"), "abs.s");
+}
+
+TEST_F(AssemblerMIPS64Test, AbsD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::AbsD, "abs.d ${reg1}, ${reg2}"), "abs.d");
+}
+
+TEST_F(AssemblerMIPS64Test, RoundLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundLS, "round.l.s ${reg1}, ${reg2}"), "round.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, RoundLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundLD, "round.l.d ${reg1}, ${reg2}"), "round.l.d");
+}
+
+TEST_F(AssemblerMIPS64Test, RoundWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundWS, "round.w.s ${reg1}, ${reg2}"), "round.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, RoundWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundWD, "round.w.d ${reg1}, ${reg2}"), "round.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CeilLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilLS, "ceil.l.s ${reg1}, ${reg2}"), "ceil.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CeilLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilLD, "ceil.l.d ${reg1}, ${reg2}"), "ceil.l.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CeilWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilWS, "ceil.w.s ${reg1}, ${reg2}"), "ceil.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CeilWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilWD, "ceil.w.d ${reg1}, ${reg2}"), "ceil.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FloorLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorLS, "floor.l.s ${reg1}, ${reg2}"), "floor.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, FloorLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorLD, "floor.l.d ${reg1}, ${reg2}"), "floor.l.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FloorWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, FloorWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SelS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelS, "sel.s ${reg1}, ${reg2}, ${reg3}"), "sel.s");
+}
+
+TEST_F(AssemblerMIPS64Test, SelD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d");
+}
+
+TEST_F(AssemblerMIPS64Test, RintS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::RintS, "rint.s ${reg1}, ${reg2}"), "rint.s");
+}
+
+TEST_F(AssemblerMIPS64Test, RintD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::RintD, "rint.d ${reg1}, ${reg2}"), "rint.d");
+}
+
+TEST_F(AssemblerMIPS64Test, ClassS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::ClassS, "class.s ${reg1}, ${reg2}"), "class.s");
+}
+
+TEST_F(AssemblerMIPS64Test, ClassD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::ClassD, "class.d ${reg1}, ${reg2}"), "class.d");
+}
+
+TEST_F(AssemblerMIPS64Test, MinS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::MinS, "min.s ${reg1}, ${reg2}, ${reg3}"), "min.s");
+}
+
+TEST_F(AssemblerMIPS64Test, MinD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::MinD, "min.d ${reg1}, ${reg2}, ${reg3}"), "min.d");
+}
+
+TEST_F(AssemblerMIPS64Test, MaxS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxS, "max.s ${reg1}, ${reg2}, ${reg3}"), "max.s");
+}
+
+TEST_F(AssemblerMIPS64Test, MaxD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CvtDL) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
+}
+
+//////////
+// MISC //
+//////////
+
+TEST_F(AssemblerMIPS64Test, Bitswap) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap");
+}
+
+TEST_F(AssemblerMIPS64Test, Dbitswap) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Dbitswap, "dbitswap ${reg1}, ${reg2}"), "dbitswap");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsbh) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Dsbh, "dsbh ${reg1}, ${reg2}"), "dsbh");
+}
+
+TEST_F(AssemblerMIPS64Test, Dshd) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Dshd, "dshd ${reg1}, ${reg2}"), "dshd");
+}
+
+TEST_F(AssemblerMIPS64Test, Wsbh) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh");
+}
+
+TEST_F(AssemblerMIPS64Test, Sc) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sc, -9, "sc ${reg1}, {imm}(${reg2})"), "sc");
+}
+
+TEST_F(AssemblerMIPS64Test, Scd) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Scd, -9, "scd ${reg1}, {imm}(${reg2})"), "scd");
+}
+
+TEST_F(AssemblerMIPS64Test, Ll) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Ll, -9, "ll ${reg1}, {imm}(${reg2})"), "ll");
+}
+
+TEST_F(AssemblerMIPS64Test, Lld) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lld, -9, "lld ${reg1}, {imm}(${reg2})"), "lld");
+}
+
+TEST_F(AssemblerMIPS64Test, Rotr) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Rotr, 5, "rotr ${reg1}, ${reg2}, {imm}"), "rotr");
+}
+
+TEST_F(AssemblerMIPS64Test, Seleqz) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"),
+            "seleqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Selnez) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"),
+            "selnez");
+}
+
+TEST_F(AssemblerMIPS64Test, Clz) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Clz, "clz ${reg1}, ${reg2}"), "clz");
+}
+
+TEST_F(AssemblerMIPS64Test, Clo) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Clo, "clo ${reg1}, ${reg2}"), "clo");
+}
+
+TEST_F(AssemblerMIPS64Test, Dclz) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclz, "dclz ${reg1}, ${reg2}"), "dclz");
+}
+
+TEST_F(AssemblerMIPS64Test, Dclo) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclo, "dclo ${reg1}, ${reg2}"), "dclo");
+}
+
+}  // namespace art
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 059c4cd..8d81f2a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -67,6 +67,7 @@
   gc/heap.cc \
   gc/reference_processor.cc \
   gc/reference_queue.cc \
+  gc/scoped_gc_critical_section.cc \
   gc/space/bump_pointer_space.cc \
   gc/space/dlmalloc_space.cc \
   gc/space/image_space.cc \
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index c3a5ce3..0d2457e 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1970,11 +1970,11 @@
   for (size_t i = 0; i < arraysize(values); ++i) {
     // 64 bit FieldSet stores the set value in the second register.
     test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
-                            0U,
-                            values[i],
-                            StubTest::GetEntrypoint(self, kQuickSet64Static),
-                            self,
-                            referrer);
+                              0U,
+                              values[i],
+                              StubTest::GetEntrypoint(self, kQuickSet64Static),
+                              self,
+                              referrer);
 
     size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 1b0d774..9c78ee5 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -263,6 +263,33 @@
                       : static_cast<unsigned_type>(value));
 }
 
+// Generate maximum/minimum values for signed/unsigned n-bit integers
+template <typename T>
+static constexpr T MaxInt(size_t bits) {
+  return
+      DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0)
+      DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0)
+      bits == BitSizeOf<T>()
+          ? std::numeric_limits<T>::max()
+          : std::is_signed<T>::value
+                ? (bits == 1
+                       ? 0
+                       : static_cast<T>(MaxInt<typename std::make_unsigned<T>::type>(bits - 1)))
+                : static_cast<T>(UINT64_C(1) << bits) - static_cast<T>(1);
+}
+
+template <typename T>
+static constexpr T MinInt(size_t bits) {
+  return
+      DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0)
+      DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0)
+      bits == BitSizeOf<T>()
+          ? std::numeric_limits<T>::min()
+          : std::is_signed<T>::value
+                ? (bits == 1 ? -1 : static_cast<T>(-1) - MaxInt<T>(bits))
+                : static_cast<T>(0);
+}
+
 // Using the Curiously Recurring Template Pattern to implement everything shared
 // by LowToHighBitIterator and HighToLowBitIterator, i.e. everything but operator*().
 template <typename T, typename Iter>
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index b6ad547..beabce3 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -2463,6 +2463,9 @@
     ScopedCheck sc(kFlag_Default, __FUNCTION__);
     JniValueType args[2] = {{.E = env}, {.L = obj}};
     if (sc.Check(soa, true, "EL", args)) {
+      if (obj != nullptr) {
+        down_cast<JNIEnvExt*>(env)->RecordMonitorEnter(obj);
+      }
       JniValueType result;
       result.i = baseEnv(env)->MonitorEnter(env, obj);
       if (sc.Check(soa, false, "i", &result)) {
@@ -2477,6 +2480,9 @@
     ScopedCheck sc(kFlag_ExcepOkay, __FUNCTION__);
     JniValueType args[2] = {{.E = env}, {.L = obj}};
     if (sc.Check(soa, true, "EL", args)) {
+      if (obj != nullptr) {
+        down_cast<JNIEnvExt*>(env)->CheckMonitorRelease(obj);
+      }
       JniValueType result;
       result.i = baseEnv(env)->MonitorExit(env, obj);
       if (sc.Check(soa, false, "i", &result)) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b0590e2..acb39c5 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -79,6 +79,7 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread-inl.h"
+#include "trace.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
@@ -1299,6 +1300,9 @@
 }
 
 void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  // Acquire tracing_enabled before locking class linker lock to prevent lock order violation. Since
+  // enabling tracing requires the mutator lock, there are no race conditions here.
+  const bool tracing_enabled = Trace::IsTracingEnabled();
   Thread* const self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
@@ -1320,6 +1324,14 @@
     // Need to make sure to not copy ArtMethods without doing read barriers since the roots are
     // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy.
     boot_class_table_.VisitRoots(buffered_visitor);
+
+    // If tracing is enabled, then mark all the class loaders to prevent unloading.
+    if (tracing_enabled) {
+      for (const ClassLoaderData& data : class_loaders_) {
+        GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root)));
+        root.VisitRoot(visitor, RootInfo(kRootVMInternal));
+      }
+    }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 739403f..7f3e938 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -883,6 +883,7 @@
   friend class ImageWriter;  // for GetClassRoots
   friend class ImageDumper;  // for FindOpenedOatFileFromOatLocation
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
+  friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
   ART_FRIEND_TEST(mirror::DexCacheTest, Open);  // for AllocDexCache
 
   DISALLOW_COPY_AND_ASSIGN(ClassLinker);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index d24b4fb..b19381d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -30,6 +30,7 @@
 #include "dex_instruction.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/allocation_record.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "handle_scope.h"
@@ -559,14 +560,15 @@
     return;
   }
 
+  Thread* const self = Thread::Current();
   {
     // TODO: dalvik only warned if there were breakpoints left over. clear in Dbg::Disconnected?
-    ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+    ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
     CHECK_EQ(gBreakpoints.size(), 0U);
   }
 
   {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+    MutexLock mu(self, *Locks::deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
     CHECK_EQ(dex_pc_change_event_ref_count_, 0U);
@@ -598,6 +600,10 @@
   Runtime* runtime = Runtime::Current();
   Thread* self = Thread::Current();
   {
+    // Required for DisableDeoptimization.
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
     ThreadState old_state = self->SetStateUnsafe(kRunnable);
     // Debugger may not be active at this point.
@@ -3162,6 +3168,10 @@
   }
   CHECK_EQ(self->GetState(), kRunnable);
   ScopedThreadSuspension sts(self, kWaitingForDeoptimization);
+  // Required for ProcessDeoptimizationRequest.
+  gc::ScopedGCCriticalSection gcs(self,
+                                  gc::kGcCauseInstrumentation,
+                                  gc::kCollectorTypeInstrumentation);
   // We need to suspend mutator threads first.
   ScopedSuspendAll ssa(__FUNCTION__);
   const ThreadState old_state = self->SetStateUnsafe(kRunnable);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index b4d42de..b3617e4 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -731,7 +731,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request)
-      REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_);
 
   static void RequestDeoptimizationLocked(const DeoptimizationRequest& req)
       REQUIRES(Locks::deoptimization_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index fc5c52e..58f256a 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -65,6 +65,9 @@
 static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
+  if (UNLIKELY(env->check_jni)) {
+    env->CheckNoHeldMonitors();
+  }
   env->locals.SetSegmentState(env->local_ref_cookie);
   env->local_ref_cookie = saved_local_ref_cookie;
   self->PopHandleScope();
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 95ba380..416510d 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -40,6 +40,8 @@
   kCollectorTypeHeapTrim,
   // A (mostly) concurrent copying collector.
   kCollectorTypeCC,
+  // Instrumentation critical section fake collector.
+  kCollectorTypeInstrumentation,
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 6be683d..84243df 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -33,6 +33,7 @@
     case kGcCauseDisableMovingGc: return "DisableMovingGc";
     case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
     case kGcCauseTrim: return "HeapTrim";
+    case kGcCauseInstrumentation: return "Instrumentation";
     default:
       LOG(FATAL) << "Unreachable";
       UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 0536f32d..34c7766 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -39,6 +39,8 @@
   kGcCauseDisableMovingGc,
   // Not a real GC cause, used when we trim the heap.
   kGcCauseTrim,
+  // Not a real GC cause, used to implement exclusion between GC and instrumentation.
+  kGcCauseInstrumentation,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
 };
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 7d664fa..657fcb5 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1312,6 +1312,13 @@
   ATRACE_END();
 }
 
+void Heap::StartGC(Thread* self, GcCause cause, CollectorType collector_type) {
+  MutexLock mu(self, *gc_complete_lock_);
+  // Ensure there is only one GC at a time.
+  WaitForGcToCompleteLocked(cause, self);
+  collector_type_running_ = collector_type;
+}
+
 void Heap::TrimSpaces(Thread* self) {
   {
     // Need to do this before acquiring the locks since we don't want to get suspended while
@@ -1319,10 +1326,7 @@
     ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
     // Pretend we are doing a GC to prevent background compaction from deleting the space we are
     // trimming.
-    MutexLock mu(self, *gc_complete_lock_);
-    // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(kGcCauseTrim, self);
-    collector_type_running_ = kCollectorTypeHeapTrim;
+    StartGC(self, kGcCauseTrim, kCollectorTypeHeapTrim);
   }
   ATRACE_BEGIN(__FUNCTION__);
   const uint64_t start_ns = NanoTime();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index d0d0be3..cc48172 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -775,6 +775,8 @@
       REQUIRES(Locks::mutator_lock_);
 
   void LogGC(GcCause gc_cause, collector::GarbageCollector* collector);
+  void StartGC(Thread* self, GcCause cause, CollectorType collector_type)
+      REQUIRES(!*gc_complete_lock_);
   void FinishGC(Thread* self, collector::GcType gc_type) REQUIRES(!*gc_complete_lock_);
 
   // Create a mem map with a preferred base address.
@@ -1325,6 +1327,7 @@
   friend class collector::MarkSweep;
   friend class collector::SemiSpace;
   friend class ReferenceQueue;
+  friend class ScopedGCCriticalSection;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
new file mode 100644
index 0000000..e7786a1
--- /dev/null
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_gc_critical_section.h"
+
+#include "gc/collector_type.h"
+#include "gc/heap.h"
+#include "runtime.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace gc {
+
+ScopedGCCriticalSection::ScopedGCCriticalSection(Thread* self,
+                                                 GcCause cause,
+                                                 CollectorType collector_type)
+    : self_(self) {
+  Runtime::Current()->GetHeap()->StartGC(self, cause, collector_type);
+  old_cause_ = self->StartAssertNoThreadSuspension("ScopedGCCriticalSection");
+}
+ScopedGCCriticalSection::~ScopedGCCriticalSection() {
+  self_->EndAssertNoThreadSuspension(old_cause_);
+  Runtime::Current()->GetHeap()->FinishGC(self_, collector::kGcTypeNone);
+}
+
+}  // namespace gc
+}  // namespace art
+
diff --git a/runtime/gc/scoped_gc_critical_section.h b/runtime/gc/scoped_gc_critical_section.h
new file mode 100644
index 0000000..ec93bca
--- /dev/null
+++ b/runtime/gc/scoped_gc_critical_section.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SCOPED_GC_CRITICAL_SECTION_H_
+#define ART_RUNTIME_GC_SCOPED_GC_CRITICAL_SECTION_H_
+
+#include "base/mutex.h"
+#include "collector_type.h"
+#include "gc_cause.h"
+
+namespace art {
+
+class Thread;
+
+namespace gc {
+
+// Wait until the GC is finished and then prevent GC from starting until the destructor. Used
+// to prevent deadlocks in places where we call ClassLinker::VisitClass with all th threads
+// suspended.
+class ScopedGCCriticalSection {
+ public:
+  ScopedGCCriticalSection(Thread* self, GcCause cause, CollectorType collector_type)
+      ACQUIRE(Roles::uninterruptible_);
+  ~ScopedGCCriticalSection() RELEASE(Roles::uninterruptible_);
+
+ private:
+  Thread* const self_;
+  const char* old_cause_;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SCOPED_GC_CRITICAL_SECTION_H_
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8046056..612ca14 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -146,9 +146,13 @@
 
   // Deoptimization.
   void EnableDeoptimization()
-      REQUIRES(Locks::mutator_lock_, !deoptimized_methods_lock_);
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!deoptimized_methods_lock_);
+  // Calls UndeoptimizeEverything which may visit class linker classes through ConfigureStubs.
   void DisableDeoptimization(const char* key)
-      REQUIRES(Locks::mutator_lock_, !deoptimized_methods_lock_);
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_)
+      REQUIRES(!deoptimized_methods_lock_);
+
   bool AreAllMethodsDeoptimized() const {
     return interpreter_stubs_installed_;
   }
@@ -156,12 +160,17 @@
 
   // Executes everything with interpreter.
   void DeoptimizeEverything(const char* key)
-      REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_,
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !Locks::classlinker_classes_lock_,
                !deoptimized_methods_lock_);
 
-  // Executes everything with compiled code (or interpreter if there is no code).
+  // Executes everything with compiled code (or interpreter if there is no code). May visit class
+  // linker classes through ConfigureStubs.
   void UndeoptimizeEverything(const char* key)
-      REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_,
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !Locks::classlinker_classes_lock_,
                !deoptimized_methods_lock_);
 
   // Deoptimize a method by forcing its execution with the interpreter. Nevertheless, a static
@@ -183,12 +192,16 @@
   // Enable method tracing by installing instrumentation entry/exit stubs or interpreter.
   void EnableMethodTracing(const char* key,
                            bool needs_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
-      REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_,
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !Locks::classlinker_classes_lock_,
                !deoptimized_methods_lock_);
 
   // Disable method tracing by uninstalling instrumentation entry/exit stubs or interpreter.
   void DisableMethodTracing(const char* key)
-      REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_,
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !Locks::classlinker_classes_lock_,
                !deoptimized_methods_lock_);
 
   InterpreterHandlerTable GetInterpreterHandlerTable() const
@@ -393,7 +406,9 @@
   // instrumentation level it needs. Therefore the current instrumentation level
   // becomes the highest instrumentation level required by a client.
   void ConfigureStubs(const char* key, InstrumentationLevel desired_instrumentation_level)
-      REQUIRES(Locks::mutator_lock_, !deoptimized_methods_lock_, !Locks::thread_list_lock_,
+      REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_)
+      REQUIRES(!deoptimized_methods_lock_,
+               !Locks::thread_list_lock_,
                !Locks::classlinker_classes_lock_);
 
   void UpdateInterpreterHandlerTable() REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index d98d246..e4688a2 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -20,6 +20,7 @@
 #include "common_throws.h"
 #include "class_linker-inl.h"
 #include "dex_file.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "handle_scope-inl.h"
 #include "jvalue.h"
 #include "runtime.h"
@@ -151,6 +152,9 @@
     ScopedObjectAccess soa(Thread::Current());
     instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
     ScopedThreadSuspension sts(soa.Self(), kSuspended);
+    gc::ScopedGCCriticalSection gcs(soa.Self(),
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("Instrumentation::ConfigureStubs");
     instr->ConfigureStubs(key, level);
   }
@@ -203,6 +207,9 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("Single method deoptimization");
     if (enable_deoptimization) {
       instrumentation->EnableDeoptimization();
@@ -216,6 +223,9 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("Single method undeoptimization");
     instrumentation->Undeoptimize(method);
     if (disable_deoptimization) {
@@ -228,6 +238,9 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("Full deoptimization");
     if (enable_deoptimization) {
       instrumentation->EnableDeoptimization();
@@ -240,6 +253,9 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("Full undeoptimization");
     instrumentation->UndeoptimizeEverything(key);
     if (disable_deoptimization) {
@@ -252,6 +268,9 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("EnableMethodTracing");
     instrumentation->EnableMethodTracing(key, needs_interpreter);
   }
@@ -261,6 +280,9 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa("EnableMethodTracing");
     instrumentation->DisableMethodTracing(key);
   }
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index b18b430..4104d7a 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -16,10 +16,17 @@
 
 #include "jni_env_ext.h"
 
+#include <algorithm>
+#include <vector>
+
 #include "check_jni.h"
 #include "indirect_reference_table.h"
 #include "java_vm_ext.h"
 #include "jni_internal.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
+#include "nth_caller_visitor.h"
+#include "thread-inl.h"
 
 namespace art {
 
@@ -63,14 +70,14 @@
 JNIEnvExt::~JNIEnvExt() {
 }
 
-jobject JNIEnvExt::NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+jobject JNIEnvExt::NewLocalRef(mirror::Object* obj) {
   if (obj == nullptr) {
     return nullptr;
   }
   return reinterpret_cast<jobject>(locals.Add(local_ref_cookie, obj));
 }
 
-void JNIEnvExt::DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+void JNIEnvExt::DeleteLocalRef(jobject obj) {
   if (obj != nullptr) {
     locals.Remove(local_ref_cookie, reinterpret_cast<IndirectRef>(obj));
   }
@@ -86,14 +93,14 @@
   monitors.Dump(os);
 }
 
-void JNIEnvExt::PushFrame(int capacity) SHARED_REQUIRES(Locks::mutator_lock_) {
+void JNIEnvExt::PushFrame(int capacity) {
   UNUSED(capacity);  // cpplint gets confused with (int) and thinks its a cast.
   // TODO: take 'capacity' into account.
   stacked_local_ref_cookies.push_back(local_ref_cookie);
   local_ref_cookie = locals.GetSegmentState();
 }
 
-void JNIEnvExt::PopFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+void JNIEnvExt::PopFrame() {
   locals.SetSegmentState(local_ref_cookie);
   local_ref_cookie = stacked_local_ref_cookies.back();
   stacked_local_ref_cookies.pop_back();
@@ -104,4 +111,118 @@
                 IndirectReferenceTable::SegmentStateOffset().Int32Value());
 }
 
+// Use some defining part of the caller's frame as the identifying mark for the JNI segment.
+static uintptr_t GetJavaCallFrame(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  NthCallerVisitor zeroth_caller(self, 0, false);
+  zeroth_caller.WalkStack();
+  if (zeroth_caller.caller == nullptr) {
+    // No Java code, must be from pure native code.
+    return 0;
+  } else if (zeroth_caller.GetCurrentQuickFrame() == nullptr) {
+    // Shadow frame = interpreter. Use the actual shadow frame's address.
+    DCHECK(zeroth_caller.GetCurrentShadowFrame() != nullptr);
+    return reinterpret_cast<uintptr_t>(zeroth_caller.GetCurrentShadowFrame());
+  } else {
+    // Quick frame = compiled code. Use the bottom of the frame.
+    return reinterpret_cast<uintptr_t>(zeroth_caller.GetCurrentQuickFrame());
+  }
+}
+
+void JNIEnvExt::RecordMonitorEnter(jobject obj) {
+  locked_objects_.push_back(std::make_pair(GetJavaCallFrame(self), obj));
+}
+
+static std::string ComputeMonitorDescription(Thread* self,
+                                             jobject obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* o = self->DecodeJObject(obj);
+  if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) &&
+      Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // Getting the identity hashcode here would result in lock inflation and suspension of the
+    // current thread, which isn't safe if this is the only runnable thread.
+    return StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)",
+                        reinterpret_cast<intptr_t>(o),
+                        PrettyTypeOf(o).c_str());
+  } else {
+    // IdentityHashCode can cause thread suspension, which would invalidate o if it moved. So
+    // we get the pretty type before we call IdentityHashCode.
+    const std::string pretty_type(PrettyTypeOf(o));
+    return StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), pretty_type.c_str());
+  }
+}
+
+static void RemoveMonitors(Thread* self,
+                           uintptr_t frame,
+                           ReferenceTable* monitors,
+                           std::vector<std::pair<uintptr_t, jobject>>* locked_objects)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  auto kept_end = std::remove_if(
+      locked_objects->begin(),
+      locked_objects->end(),
+      [self, frame, monitors](const std::pair<uintptr_t, jobject>& pair)
+          SHARED_REQUIRES(Locks::mutator_lock_) {
+        if (frame == pair.first) {
+          mirror::Object* o = self->DecodeJObject(pair.second);
+          monitors->Remove(o);
+          return true;
+        }
+        return false;
+      });
+  locked_objects->erase(kept_end, locked_objects->end());
+}
+
+void JNIEnvExt::CheckMonitorRelease(jobject obj) {
+  uintptr_t current_frame = GetJavaCallFrame(self);
+  std::pair<uintptr_t, jobject> exact_pair = std::make_pair(current_frame, obj);
+  auto it = std::find(locked_objects_.begin(), locked_objects_.end(), exact_pair);
+  bool will_abort = false;
+  if (it != locked_objects_.end()) {
+    locked_objects_.erase(it);
+  } else {
+    // Check whether this monitor was locked in another JNI "session."
+    mirror::Object* mirror_obj = self->DecodeJObject(obj);
+    for (std::pair<uintptr_t, jobject>& pair : locked_objects_) {
+      if (self->DecodeJObject(pair.second) == mirror_obj) {
+        std::string monitor_descr = ComputeMonitorDescription(self, pair.second);
+        vm->JniAbortF("<JNI MonitorExit>",
+                      "Unlocking monitor that wasn't locked here: %s",
+                      monitor_descr.c_str());
+        will_abort = true;
+        break;
+      }
+    }
+  }
+
+  // When we abort, also make sure that any locks from the current "session" are removed from
+  // the monitors table, otherwise we may visit local objects in GC during abort (which won't be
+  // valid anymore).
+  if (will_abort) {
+    RemoveMonitors(self, current_frame, &monitors, &locked_objects_);
+  }
+}
+
+void JNIEnvExt::CheckNoHeldMonitors() {
+  uintptr_t current_frame = GetJavaCallFrame(self);
+  // The locked_objects_ are grouped by their stack frame component, as this enforces structured
+  // locking, and the groups form a stack. So the current frame entries are at the end. Check
+  // whether the vector is empty, and when there are elements, whether the last element belongs
+  // to this call - this signals that there are unlocked monitors.
+  if (!locked_objects_.empty()) {
+    std::pair<uintptr_t, jobject>& pair = locked_objects_[locked_objects_.size() - 1];
+    if (pair.first == current_frame) {
+      std::string monitor_descr = ComputeMonitorDescription(self, pair.second);
+      vm->JniAbortF("<JNI End>",
+                    "Still holding a locked object on JNI end: %s",
+                    monitor_descr.c_str());
+      // When we abort, also make sure that any locks from the current "session" are removed from
+      // the monitors table, otherwise we may visit local objects in GC during abort.
+      RemoveMonitors(self, current_frame, &monitors, &locked_objects_);
+    } else if (kIsDebugBuild) {
+      // Make sure there are really no other entries and our checking worked as expected.
+      for (std::pair<uintptr_t, jobject>& check_pair : locked_objects_) {
+        CHECK_NE(check_pair.first, current_frame);
+      }
+    }
+  }
+}
+
 }  // namespace art
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 9b55536..3828ff0 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -43,8 +43,8 @@
 
   void SetCheckJniEnabled(bool enabled);
 
-  void PushFrame(int capacity);
-  void PopFrame();
+  void PushFrame(int capacity) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PopFrame() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<typename T>
   T AddLocalReference(mirror::Object* obj)
@@ -89,10 +89,27 @@
   // Used by -Xcheck:jni.
   const JNINativeInterface* unchecked_functions;
 
+  // Functions to keep track of monitor lock and unlock operations. Used to ensure proper locking
+  // rules in CheckJNI mode.
+
+  // Record locking of a monitor.
+  void RecordMonitorEnter(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Check the release, that is, that the release is performed in the same JNI "segment."
+  void CheckMonitorRelease(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Check that no monitors are held that have been acquired in this JNI "segment."
+  void CheckNoHeldMonitors() SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // The constructor should not be called directly. It may leave the object in an erronuous state,
   // and the result needs to be checked.
   JNIEnvExt(Thread* self, JavaVMExt* vm);
+
+  // All locked objects, with the (Java caller) stack frame that locked them. Used in CheckJNI
+  // to ensure that only monitors locked in this native frame are being unlocked, and that at
+  // the end all are unlocked.
+  std::vector<std::pair<uintptr_t, jobject>> locked_objects_;
 };
 
 // Used to save and restore the JNIEnvExt state when not going through code created by the JNI
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 2a0cb28..41b368e 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -607,11 +607,64 @@
     EXPECT_EQ(check_jni, vm_->SetCheckJniEnabled(old_check_jni));
   }
 
+  void SetUpForTest(bool direct, const char* method_name, const char* method_sig,
+                    void* native_fnptr) {
+    // Initialize class loader and set generic JNI entrypoint.
+    // Note: this code is adapted from the jni_compiler_test, and taken with minimal modifications.
+    if (!runtime_->IsStarted()) {
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        class_loader_ = LoadDex("MyClassNatives");
+        StackHandleScope<1> hs(soa.Self());
+        Handle<mirror::ClassLoader> loader(
+            hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader_)));
+        mirror::Class* c = class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader);
+        const auto pointer_size = class_linker_->GetImagePointerSize();
+        ArtMethod* method = direct ? c->FindDirectMethod(method_name, method_sig, pointer_size) :
+            c->FindVirtualMethod(method_name, method_sig, pointer_size);
+        ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig;
+        method->SetEntryPointFromQuickCompiledCode(class_linker_->GetRuntimeQuickGenericJniStub());
+      }
+      // Start runtime.
+      Thread::Current()->TransitionFromSuspendedToRunnable();
+      bool started = runtime_->Start();
+      CHECK(started);
+    }
+    // JNI operations after runtime start.
+    env_ = Thread::Current()->GetJniEnv();
+    jklass_ = env_->FindClass("MyClassNatives");
+    ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig;
+
+    if (direct) {
+      jmethod_ = env_->GetStaticMethodID(jklass_, method_name, method_sig);
+    } else {
+      jmethod_ = env_->GetMethodID(jklass_, method_name, method_sig);
+    }
+    ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig;
+
+    if (native_fnptr != nullptr) {
+      JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } };
+      ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1))
+          << method_name << " " << method_sig;
+    } else {
+      env_->UnregisterNatives(jklass_);
+    }
+
+    jmethodID constructor = env_->GetMethodID(jklass_, "<init>", "()V");
+    jobj_ = env_->NewObject(jklass_, constructor);
+    ASSERT_TRUE(jobj_ != nullptr) << method_name << " " << method_sig;
+  }
+
   JavaVMExt* vm_;
   JNIEnv* env_;
   jclass aioobe_;
   jclass ase_;
   jclass sioobe_;
+
+  jclass jklass_;
+  jobject jobj_;
+  jobject class_loader_;
+  jmethodID jmethod_;
 };
 
 TEST_F(JniInternalTest, AllocObject) {
@@ -2111,4 +2164,38 @@
   }
 }
 
+void Java_MyClassNatives_foo_exit(JNIEnv* env, jobject thisObj) {
+  // Release the monitor on self. This should trigger an abort.
+  env->MonitorExit(thisObj);
+}
+
+TEST_F(JniInternalTest, MonitorExitLockedInDifferentCall) {
+  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo_exit));
+  ASSERT_NE(jobj_, nullptr);
+
+  env_->MonitorEnter(jobj_);
+  EXPECT_FALSE(env_->ExceptionCheck());
+
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
+  check_jni_abort_catcher.Check("Unlocking monitor that wasn't locked here");
+}
+
+void Java_MyClassNatives_foo_enter_no_exit(JNIEnv* env, jobject thisObj) {
+  // Acquire but don't release the monitor on self. This should trigger an abort on return.
+  env->MonitorEnter(thisObj);
+}
+
+TEST_F(JniInternalTest, MonitorExitNotAllUnlocked) {
+  SetUpForTest(false,
+               "foo",
+               "()V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_foo_enter_no_exit));
+  ASSERT_NE(jobj_, nullptr);
+
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
+  check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
+}
+
 }  // namespace art
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 3b84bfa..4aebc2c 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -192,28 +192,38 @@
   }
 }
 
-static void DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) {
-  std::unique_ptr<std::vector<const DexFile*>> dex_files = ConvertJavaArrayToNative(env, cookie);
-  if (dex_files.get() == nullptr) {
-    DCHECK(env->ExceptionCheck());
-    return;
-  }
-
+static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) {
   ScopedObjectAccess soa(env);
+  mirror::Object* dex_files_object = soa.Decode<mirror::Object*>(cookie);
+  if (dex_files_object == nullptr) {
+    ThrowNullPointerException("cookie == null");
+    return JNI_FALSE;
+  }
+  mirror::LongArray* dex_files = dex_files_object->AsLongArray();
 
-  // The Runtime currently never unloads classes, which means any registered
-  // dex files must be kept around forever in case they are used. We
-  // accomplish this here by explicitly leaking those dex files that are
-  // registered.
-  //
-  // TODO: The Runtime should support unloading of classes and freeing of the
-  // dex files for those unloaded classes rather than leaking dex files here.
+  // Delete dex files associated with this dalvik.system.DexFile since there should not be running
+  // code using it. dex_files is a vector due to multidex.
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-  for (const DexFile* dex_file : *dex_files) {
+  bool all_deleted = true;
+  for (int32_t i = 0, count = dex_files->GetLength(); i < count; ++i) {
+    auto* dex_file = reinterpret_cast<DexFile*>(dex_files->Get(i));
+    if (dex_file == nullptr) {
+      continue;
+    }
+    // Only delete the dex file if the dex cache is not found to prevent runtime crashes if there
+    // are calls to DexFile.close while the ART DexFile is still in use.
     if (class_linker->FindDexCache(soa.Self(), *dex_file, true) == nullptr) {
+      // Clear the element in the array so that we can call close again.
+      dex_files->Set(i, 0);
       delete dex_file;
+    } else {
+      all_deleted = false;
     }
   }
+
+  // TODO: Also unmap the OatFile for this dalvik.system.DexFile.
+
+  return all_deleted ? JNI_TRUE : JNI_FALSE;
 }
 
 static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
@@ -379,7 +389,7 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"),
+  NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile, defineClassNative,
                 "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index 710c21f..8e5b187 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -26,10 +26,16 @@
 // table-lookup read barriers.
 
 #ifdef ART_USE_READ_BARRIER
+#if ART_READ_BARRIER_TYPE_IS_BAKER
 #define USE_BAKER_READ_BARRIER
-// #define USE_BROOKS_READ_BARRIER
-// #define USE_TABLE_LOOKUP_READ_BARRIER
+#elif ART_READ_BARRIER_TYPE_IS_BROOKS
+#define USE_BROOKS_READ_BARRIER
+#elif ART_READ_BARRIER_TYPE_IS_TABLELOOKUP
+#define USE_TABLE_LOOKUP_READ_BARRIER
+#else
+#error "ART read barrier type must be set"
 #endif
+#endif  // ART_USE_READ_BARRIER
 
 #ifdef ART_HEAP_POISONING
 #define USE_HEAP_POISONING
diff --git a/runtime/trace.cc b/runtime/trace.cc
index e2743ce..745aa63 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -31,6 +31,7 @@
 #include "common_throws.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "instrumentation.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -350,6 +351,10 @@
 
   // Create Trace object.
   {
+    // Required since EnableMethodTracing calls ConfigureStubs which visits class linker classes.
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
     MutexLock mu(self, *Locks::trace_lock_);
     if (the_trace_ != nullptr) {
@@ -464,9 +469,10 @@
   Runtime* runtime = Runtime::Current();
   Trace* the_trace = nullptr;
 
+  Thread* const self = Thread::Current();
   pthread_t sampling_pthread = 0U;
   {
-    MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+    MutexLock mu(self, *Locks::trace_lock_);
     if (the_trace_ == nullptr) {
       LOG(ERROR) << "Trace pause requested, but no trace currently running";
       return;
@@ -478,23 +484,26 @@
 
   if (sampling_pthread != 0U) {
     {
-      MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+      MutexLock mu(self, *Locks::trace_lock_);
       the_trace_ = nullptr;
     }
     CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
     sampling_pthread_ = 0U;
     {
-      MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+      MutexLock mu(self, *Locks::trace_lock_);
       the_trace_ = the_trace;
     }
   }
 
   if (the_trace != nullptr) {
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
     stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
 
     if (the_trace->trace_mode_ == TraceMode::kSampling) {
-      MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+      MutexLock mu(self, *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
       runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
@@ -530,6 +539,9 @@
   bool enable_stats = (the_trace->flags_ && kTraceCountAllocs) != 0;
 
   {
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
 
     // Reenable.
@@ -1046,4 +1058,9 @@
   return the_trace_->buffer_size_;
 }
 
+bool Trace::IsTracingEnabled() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  return the_trace_ != nullptr;
+}
+
 }  // namespace art
diff --git a/runtime/trace.h b/runtime/trace.h
index 87a691d..356a81f 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -183,6 +183,9 @@
   static TraceMode GetMode() REQUIRES(!Locks::trace_lock_);
   static size_t GetBufferSize() REQUIRES(!Locks::trace_lock_);
 
+  // Used by class linker to prevent class unloading.
+  static bool IsTracingEnabled() REQUIRES(!Locks::trace_lock_);
+
  private:
   Trace(File* trace_file, const char* trace_name, size_t buffer_size, int flags,
         TraceOutputMode output_mode, TraceMode trace_mode);
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index e0a187a..2f6d496 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -155,6 +155,12 @@
      * See if we can GC after a failed load.
      */
     static void testFailLoadAndGc() throws TestFailed {
+        processFailLoadAndGc();
+        Runtime.getRuntime().gc();
+        System.out.println("GC complete.");
+    }
+
+    private static void processFailLoadAndGc() throws TestFailed {
         try {
             BrokenDexLoader loader;
 
@@ -170,7 +176,5 @@
                 ite.printStackTrace();
             }
         }
-        Runtime.getRuntime().gc();
-        System.out.println("GC complete.");
     }
 }
diff --git a/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali b/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali
index aa0c2d5..0dc492f 100644
--- a/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali
+++ b/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali
@@ -5,7 +5,7 @@
 .method public static run(Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsInterpreted()V
+   invoke-static {}, LMain;->assertIsInterpreted()V
 
    # Lock twice, but unlock thrice.
 
diff --git a/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali b/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali
index 2c31fda..df6e168 100644
--- a/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali
+++ b/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali
@@ -5,7 +5,7 @@
 .method public static run(Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsInterpreted()V
+   invoke-static {}, LMain;->assertIsInterpreted()V
 
    # Lock thrice, but only unlock twice.
 
diff --git a/test/088-monitor-verification/smali/OK.smali b/test/088-monitor-verification/smali/OK.smali
index 596798d..a43ecb0 100644
--- a/test/088-monitor-verification/smali/OK.smali
+++ b/test/088-monitor-verification/smali/OK.smali
@@ -20,7 +20,7 @@
 .method public static runNoMonitors(Ljava/lang/Object;Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsManaged()V
+   invoke-static {}, LMain;->assertIsManaged()V
 
    return-void
 
@@ -29,7 +29,7 @@
 .method public static runStraightLine(Ljava/lang/Object;Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsManaged()V
+   invoke-static {}, LMain;->assertIsManaged()V
 
    monitor-enter v1      # 1
    monitor-enter v2      # 2
@@ -44,7 +44,7 @@
 .method public static runBalancedJoin(Ljava/lang/Object;Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsManaged()V
+   invoke-static {}, LMain;->assertIsManaged()V
 
    monitor-enter v1      # 1
 
diff --git a/test/088-monitor-verification/smali/TooDeep.smali b/test/088-monitor-verification/smali/TooDeep.smali
index 1a8f2f0..a1e3281 100644
--- a/test/088-monitor-verification/smali/TooDeep.smali
+++ b/test/088-monitor-verification/smali/TooDeep.smali
@@ -7,7 +7,7 @@
 
    # Lock depth is 33, which is more than the verifier supports. This should have been punted to
    # the interpreter.
-   invoke-static {}, LMain;->assertCallerIsInterpreted()V
+   invoke-static {}, LMain;->assertIsInterpreted()V
 
    monitor-enter v2        #  1
    monitor-enter v2        #  2
diff --git a/test/088-monitor-verification/smali/UnbalancedJoin.smali b/test/088-monitor-verification/smali/UnbalancedJoin.smali
index da8f773..993f32c 100644
--- a/test/088-monitor-verification/smali/UnbalancedJoin.smali
+++ b/test/088-monitor-verification/smali/UnbalancedJoin.smali
@@ -5,7 +5,7 @@
 .method public static run(Ljava/lang/Object;Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsInterpreted()V
+   invoke-static {}, LMain;->assertIsInterpreted()V
 
    if-eqz v2, :Lnull
 
diff --git a/test/088-monitor-verification/smali/UnbalancedStraight.smali b/test/088-monitor-verification/smali/UnbalancedStraight.smali
index 68edb6c..cbb8bcc 100644
--- a/test/088-monitor-verification/smali/UnbalancedStraight.smali
+++ b/test/088-monitor-verification/smali/UnbalancedStraight.smali
@@ -5,7 +5,7 @@
 .method public static run(Ljava/lang/Object;Ljava/lang/Object;)V
    .registers 3
 
-   invoke-static {}, LMain;->assertCallerIsInterpreted()V
+   invoke-static {}, LMain;->assertIsInterpreted()V
 
    monitor-enter v1      # 1
    monitor-enter v2      # 2
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index fc5755b..2188055 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -220,7 +220,7 @@
 
     // Smali testing code.
     private static void runSmaliTests() {
-        if (!hasOatFile() || runtimeIsSoftFail() || isCallerInterpreted()) {
+        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
             // Skip test, this seems to be a non-compiled code test configuration.
             return;
         }
@@ -277,9 +277,9 @@
     }
 
     // Helpers for the smali code.
-    public static native void assertCallerIsInterpreted();
-    public static native void assertCallerIsManaged();
+    public static native void assertIsInterpreted();
+    public static native void assertIsManaged();
     public static native boolean hasOatFile();
     public static native boolean runtimeIsSoftFail();
-    public static native boolean isCallerInterpreted();
+    public static native boolean isInterpreted();
 }
diff --git a/test/449-checker-bce/expected.txt b/test/449-checker-bce/expected.txt
index e114c50..4665d7a 100644
--- a/test/449-checker-bce/expected.txt
+++ b/test/449-checker-bce/expected.txt
@@ -1 +1,2 @@
+JNI_OnLoad called
 java.lang.ArrayIndexOutOfBoundsException: length=5; index=82
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index f06c250..22829cd 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -265,6 +265,7 @@
 
   // A helper into which the actual throwing function should be inlined.
   static void constantIndexingForward6(int[] array) {
+    assertIsManaged();
     constantIndexing6(array);
   }
 
@@ -618,13 +619,17 @@
 
   static int foo() {
     try {
+      assertIsManaged();
       // This will cause AIOOBE.
       constantIndexing2(new int[3]);
     } catch (ArrayIndexOutOfBoundsException e) {
+      assertIsManaged();  // This is to ensure that single-frame deoptimization works.
+                                // Will need to be updated if constantIndexing2 is inlined.
       try {
         // This will cause AIOOBE.
         constantIndexingForward6(new int[3]);
       } catch (ArrayIndexOutOfBoundsException e2) {
+        assertIsManaged();
         return 99;
       }
     }
@@ -634,13 +639,13 @@
 
   int sum;
 
-  /// CHECK-START: void Main.foo1(int[], int, int) BCE (before)
+  /// CHECK-START: void Main.foo1(int[], int, int, boolean) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo1(int[], int, int) BCE (after)
+  /// CHECK-START: void Main.foo1(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -657,25 +662,30 @@
   /// CHECK: Phi
   /// CHECK: Goto
 
-  void foo1(int[] array, int start, int end) {
+  void foo1(int[] array, int start, int end, boolean expectInterpreter) {
     // Three HDeoptimize will be added. One for
     // start >= 0, one for end <= array.length,
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       array[i] = 1;
       sum += array[i];
     }
   }
 
 
-  /// CHECK-START: void Main.foo2(int[], int, int) BCE (before)
+  /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo2(int[], int, int) BCE (after)
+  /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -692,25 +702,30 @@
   /// CHECK: Phi
   /// CHECK: Goto
 
-  void foo2(int[] array, int start, int end) {
+  void foo2(int[] array, int start, int end, boolean expectInterpreter) {
     // Three HDeoptimize will be added. One for
     // start >= 0, one for end <= array.length,
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       array[i] = 1;
       sum += array[i];
     }
   }
 
 
-  /// CHECK-START: void Main.foo3(int[], int) BCE (before)
+  /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo3(int[], int) BCE (after)
+  /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -726,24 +741,29 @@
   /// CHECK: Phi
   /// CHECK: Goto
 
-  void foo3(int[] array, int end) {
+  void foo3(int[] array, int end, boolean expectInterpreter) {
     // Two HDeoptimize will be added. One for end < array.length,
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       array[i] = 1;
       sum += array[i];
     }
   }
 
 
-  /// CHECK-START: void Main.foo4(int[], int) BCE (before)
+  /// CHECK-START: void Main.foo4(int[], int, boolean) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo4(int[], int) BCE (after)
+  /// CHECK-START: void Main.foo4(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -759,18 +779,23 @@
   /// CHECK: Phi
   /// CHECK: Goto
 
-  void foo4(int[] array, int end) {
+  void foo4(int[] array, int end, boolean expectInterpreter) {
     // Two HDeoptimize will be added. One for end <= array.length,
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       array[i - 1] = 1;
       sum += array[i - 1];
     }
   }
 
 
-  /// CHECK-START: void Main.foo5(int[], int) BCE (before)
+  /// CHECK-START: void Main.foo5(int[], int, boolean) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK: BoundsCheck
@@ -780,7 +805,7 @@
   /// CHECK: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo5(int[], int) BCE (after)
+  /// CHECK-START: void Main.foo5(int[], int, boolean) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK: Phi
@@ -800,7 +825,7 @@
   /// CHECK-NOT: Phi
   /// CHECK: Goto
 
-  void foo5(int[] array, int end) {
+  void foo5(int[] array, int end, boolean expectInterpreter) {
     // Bounds check in this loop can be eliminated without deoptimization.
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
@@ -808,6 +833,11 @@
     // One HDeoptimize will be added.
     // It's for (end - 2 <= array.length - 2).
     for (int i = end - 2 ; i > 0; i--) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       sum += array[i - 1];
       sum += array[i];
       sum += array[i + 1];
@@ -815,7 +845,7 @@
   }
 
 
-  /// CHECK-START: void Main.foo6(int[], int, int) BCE (before)
+  /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArrayGet
   /// CHECK: BoundsCheck
@@ -829,7 +859,7 @@
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
 
-  /// CHECK-START: void Main.foo6(int[], int, int) BCE (after)
+  /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
@@ -855,12 +885,17 @@
   /// CHECK: Goto
   /// CHECK-NOT: Deoptimize
 
-  void foo6(int[] array, int start, int end) {
+  void foo6(int[] array, int start, int end, boolean expectInterpreter) {
     // Three HDeoptimize will be added. One for
     // start >= 2, one for end <= array.length - 3,
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = end; i >= start; i--) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
     }
   }
@@ -948,12 +983,12 @@
   }
 
 
-  /// CHECK-START: void Main.foo9(int[]) BCE (before)
+  /// CHECK-START: void Main.foo9(int[], boolean) BCE (before)
   /// CHECK: NullCheck
   /// CHECK: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo9(int[]) BCE (after)
+  /// CHECK-START: void Main.foo9(int[], boolean) BCE (after)
   //  The loop is guaranteed to be entered. No need to transform the
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
@@ -964,10 +999,15 @@
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  void foo9(int[] array) {
+  void foo9(int[] array, boolean expectInterpreter) {
     // Two HDeoptimize will be added. One for
     // 10 <= array.length, and one for null check on array.
     for (int i = 0 ; i < 10; i++) {
+      if (expectInterpreter) {
+        assertIsInterpreted();
+      } else {
+        assertIsManaged();
+      }
       sum += array[i];
     }
   }
@@ -999,7 +1039,7 @@
   static void testUnknownBounds() {
     boolean caught = false;
     Main main = new Main();
-    main.foo1(new int[10], 0, 10);
+    main.foo1(new int[10], 0, 10, false);
     if (main.sum != 10) {
       System.out.println("foo1 failed!");
     }
@@ -1007,7 +1047,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo1(new int[10], 0, 11);
+      main.foo1(new int[10], 0, 11, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1016,7 +1056,7 @@
     }
 
     main = new Main();
-    main.foo2(new int[10], 0, 9);
+    main.foo2(new int[10], 0, 9, false);
     if (main.sum != 10) {
       System.out.println("foo2 failed!");
     }
@@ -1024,7 +1064,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo2(new int[10], 0, 10);
+      main.foo2(new int[10], 0, 10, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1033,7 +1073,7 @@
     }
 
     main = new Main();
-    main.foo3(new int[10], 9);
+    main.foo3(new int[10], 9, false);
     if (main.sum != 7) {
       System.out.println("foo3 failed!");
     }
@@ -1041,7 +1081,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo3(new int[10], 10);
+      main.foo3(new int[10], 10, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1050,7 +1090,7 @@
     }
 
     main = new Main();
-    main.foo4(new int[10], 10);
+    main.foo4(new int[10], 10, false);
     if (main.sum != 10) {
       System.out.println("foo4 failed!");
     }
@@ -1058,7 +1098,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo4(new int[10], 11);
+      main.foo4(new int[10], 11, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1067,7 +1107,7 @@
     }
 
     main = new Main();
-    main.foo5(new int[10], 10);
+    main.foo5(new int[10], 10, false);
     if (main.sum != 24) {
       System.out.println("foo5 failed!");
     }
@@ -1075,7 +1115,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo5(new int[10], 11);
+      main.foo5(new int[10], 11, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1084,11 +1124,11 @@
     }
 
     main = new Main();
-    main.foo6(new int[10], 2, 7);
+    main.foo6(new int[10], 2, 7, false);
 
     main = new Main();
     int[] array9 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    main.foo9(array9);
+    main.foo9(array9, false);
     if (main.sum != 45) {
       System.out.println("foo9 failed!");
     }
@@ -1104,7 +1144,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo6(new int[10], 2, 8);
+      main.foo6(new int[10], 2, 8, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1115,7 +1155,7 @@
     caught = false;
     main = new Main();
     try {
-      main.foo6(new int[10], 1, 7);
+      main.foo6(new int[10], 1, 7, true);
     } catch (ArrayIndexOutOfBoundsException e) {
       caught = true;
     }
@@ -1152,6 +1192,15 @@
   /// CHECK: ParallelMove
 
   public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    if (!compiledWithOptimizing() ||
+        !hasOatFile() ||
+        runtimeIsSoftFail() ||
+        isInterpreted()) {
+      disableStackFrameAsserts();
+    }
+
     sieve(20);
 
     int[] array = {5, 2, 3, 7, 0, 1, 6, 4};
@@ -1190,4 +1239,11 @@
     new Main().testExceptionMessage();
   }
 
+  public static native boolean compiledWithOptimizing();
+  public static native void disableStackFrameAsserts();
+  public static native void assertIsManaged();
+  public static native void assertIsInterpreted();
+  public static native boolean hasOatFile();
+  public static native boolean runtimeIsSoftFail();
+  public static native boolean isInterpreted();
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index a103eac..db16b97 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -369,6 +369,7 @@
 # This test dynamically enables tracing to force a deoptimization. This makes the test meaningless
 # when already tracing, and writes an error message that we do not want to check for.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
+  087-gc-after-link \
   137-cfi \
   141-class-unload \
   802-deoptimization
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 042b03b..082c9b3 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -66,4 +66,54 @@
   return Runtime::Current()->IsImageDex2OatEnabled();
 }
 
+// public static native boolean compiledWithOptimizing();
+// Did we use the optimizing compiler to compile this?
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_compiledWithOptimizing(JNIEnv* env, jclass cls) {
+  ScopedObjectAccess soa(env);
+
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  const DexFile& dex_file = klass->GetDexFile();
+  const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
+  if (oat_dex_file == nullptr) {
+    // Could be JIT, which also uses optimizing, but conservatively say no.
+    return JNI_FALSE;
+  }
+  const OatFile* oat_file = oat_dex_file->GetOatFile();
+  CHECK(oat_file != nullptr);
+
+  const char* cmd_line = oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kDex2OatCmdLineKey);
+  CHECK(cmd_line != nullptr);  // Huh? This should not happen.
+
+  // Check the backend.
+  constexpr const char* kCompilerBackend = "--compiler-backend=";
+  const char* backend = strstr(cmd_line, kCompilerBackend);
+  if (backend != nullptr) {
+    // If it's set, make sure it's optimizing.
+    backend += strlen(kCompilerBackend);
+    if (strncmp(backend, "Optimizing", strlen("Optimizing")) != 0) {
+      return JNI_FALSE;
+    }
+  }
+
+  // Check the filter.
+  constexpr const char* kCompilerFilter = "--compiler-filter=";
+  const char* filter = strstr(cmd_line, kCompilerFilter);
+  if (filter != nullptr) {
+    // If it's set, make sure it's not interpret-only|verify-none|verify-at-runtime.
+    // Note: The space filter might have an impact on the test, but ignore that for now.
+    filter += strlen(kCompilerFilter);
+    constexpr const char* kInterpretOnly = "interpret-only";
+    constexpr const char* kVerifyNone = "verify-none";
+    constexpr const char* kVerifyAtRuntime = "verify-at-runtime";
+    if (strncmp(filter, kInterpretOnly, strlen(kInterpretOnly)) == 0 ||
+        strncmp(filter, kVerifyNone, strlen(kVerifyNone)) == 0 ||
+        strncmp(filter, kVerifyAtRuntime, strlen(kVerifyAtRuntime)) == 0) {
+      return JNI_FALSE;
+    }
+  }
+
+  return JNI_TRUE;
+}
+
 }  // namespace art
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index d22cf52..922eae6 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -27,9 +27,20 @@
 
 namespace art {
 
-// public static native boolean isCallerInterpreted();
+static bool asserts_enabled = true;
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerInterpreted(JNIEnv* env, jclass) {
+// public static native void disableStackFrameAsserts();
+// Note: to globally disable asserts in unsupported configurations.
+
+extern "C" JNIEXPORT void JNICALL Java_Main_disableStackFrameAsserts(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                     jclass cls ATTRIBUTE_UNUSED) {
+  asserts_enabled = false;
+}
+
+
+// public static native boolean isInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass) {
   ScopedObjectAccess soa(env);
   NthCallerVisitor caller(soa.Self(), 1, false);
   caller.WalkStack();
@@ -37,16 +48,18 @@
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 
-// public static native void assertCallerIsInterpreted();
+// public static native void assertIsInterpreted();
 
-extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass klass) {
-  CHECK(Java_Main_isCallerInterpreted(env, klass));
+extern "C" JNIEXPORT void JNICALL Java_Main_assertIsInterpreted(JNIEnv* env, jclass klass) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isInterpreted(env, klass));
+  }
 }
 
 
-// public static native boolean isCallerManaged();
+// public static native boolean isManaged();
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerManaged(JNIEnv* env, jclass cls) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
   ScopedObjectAccess soa(env);
 
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
@@ -65,10 +78,12 @@
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
 }
 
-// public static native void assertCallerIsManaged();
+// public static native void assertIsManaged();
 
-extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
-  CHECK(Java_Main_isCallerManaged(env, cls));
+extern "C" JNIEXPORT void JNICALL Java_Main_assertIsManaged(JNIEnv* env, jclass cls) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isManaged(env, cls));
+  }
 }
 
 }  // namespace art