Merge "Undo dex2dex compilation before invoking LoadHook"
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 1876efc..1ae79ac 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -66,6 +66,9 @@
 # Do you want to test the optimizing compiler with graph coloring register allocation?
 ART_TEST_OPTIMIZING_GRAPH_COLOR ?= $(ART_TEST_FULL)
 
+# Do you want to do run-tests with profiles?
+ART_TEST_SPEED_PROFILE ?= $(ART_TEST_FULL)
+
 # Do we want to test PIC-compiled tests ("apps")?
 ART_TEST_PIC_TEST ?= $(ART_TEST_FULL)
 
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 71c4e95..cd19fa4 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -401,19 +401,19 @@
 };
 
 template <>
-struct CmdlineType<std::vector<ti::Agent>> : CmdlineTypeParser<std::vector<ti::Agent>> {
+struct CmdlineType<std::list<ti::Agent>> : CmdlineTypeParser<std::list<ti::Agent>> {
   Result Parse(const std::string& args) {
-    assert(false && "Use AppendValues() for an Agent vector type");
-    return Result::Failure("Unconditional failure: Agent vector must be appended: " + args);
+    assert(false && "Use AppendValues() for an Agent list type");
+    return Result::Failure("Unconditional failure: Agent list must be appended: " + args);
   }
 
   Result ParseAndAppend(const std::string& args,
-                        std::vector<ti::Agent>& existing_value) {
+                        std::list<ti::Agent>& existing_value) {
     existing_value.emplace_back(args);
     return Result::SuccessNoValue();
   }
 
-  static const char* Name() { return "std::vector<ti::Agent>"; }
+  static const char* Name() { return "std::list<ti::Agent>"; }
 };
 
 template <>
diff --git a/compiler/Android.bp b/compiler/Android.bp
index d57f301..b444fff 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -391,6 +391,7 @@
         mips64: {
             srcs: [
                 "linker/mips64/relative_patcher_mips64_test.cc",
+                "utils/mips64/managed_register_mips64_test.cc",
             ],
         },
         x86: {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b1a92c2..d463830 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3281,7 +3281,7 @@
 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
-  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   LocationSummary* locations = instruction->GetLocations();
   Register out = OutputRegister(instruction);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5f02a52..287891f 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -461,6 +461,536 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
+class ArraySetSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierMarkSlowPathMIPS(HInstruction* instruction,
+                              Location ref,
+                              Location entrypoint = Location::NoLocation())
+      : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == FP)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    if (entrypoint_.IsValid()) {
+      mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9);
+      __ Jalr(entrypoint_.AsRegister<Register>());
+      __ NopIfNoReordering();
+    } else {
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+      // This runtime call does not require a stack map.
+      mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                        instruction_,
+                                                        this,
+                                                        /* direct */ false);
+    }
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if already loaded.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction,
+                                            Location ref,
+                                            Register obj,
+                                            Location field_offset,
+                                            Register temp1)
+      : SlowPathCodeMIPS(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_offset_(field_offset),
+        temp1_(temp1) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+    DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+    __ Bind(GetEntryLabel());
+
+    // Save the old reference.
+    // Note that we cannot use AT or TMP to save the old reference, as those
+    // are used by the code that follows, but we need the old reference after
+    // the call to the ReadBarrierMarkRegX entry point.
+    DCHECK_NE(temp1_, AT);
+    DCHECK_NE(temp1_, TMP);
+    __ Move(temp1_, ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == FP)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+    // This runtime call does not require a stack map.
+    mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                      instruction_,
+                                                      this,
+                                                      /* direct */ false);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*(obj_ + field_offset_)`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // the compare-and-set (CAS) loop below would abort, leaving the
+    // field as-is.
+    MipsLabel done;
+    __ Beq(temp1_, ref_reg, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+
+    // Convenience aliases.
+    Register base = obj_;
+    // The UnsafeCASObject intrinsic uses a register pair as field
+    // offset ("long offset"), of which only the low part contains
+    // data.
+    Register offset = field_offset_.AsRegisterPairLow<Register>();
+    Register expected = temp1_;
+    Register value = ref_reg;
+    Register tmp_ptr = TMP;      // Pointer to actual memory.
+    Register tmp = AT;           // Value in memory.
+
+    __ Addu(tmp_ptr, base, offset);
+
+    if (kPoisonHeapReferences) {
+      __ PoisonHeapReference(expected);
+      // Do not poison `value` if it is the same register as
+      // `expected`, which has just been poisoned.
+      if (value != expected) {
+        __ PoisonHeapReference(value);
+      }
+    }
+
+    // do {
+    //   tmp = [r_ptr] - expected;
+    // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+    bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+    MipsLabel loop_head, exit_loop;
+    __ Bind(&loop_head);
+    if (is_r6) {
+      __ LlR6(tmp, tmp_ptr);
+    } else {
+      __ LlR2(tmp, tmp_ptr);
+    }
+    __ Bne(tmp, expected, &exit_loop);
+    __ Move(tmp, value);
+    if (is_r6) {
+      __ ScR6(tmp, tmp_ptr);
+    } else {
+      __ ScR2(tmp, tmp_ptr);
+    }
+    __ Beqz(tmp, &loop_head);
+    __ Bind(&exit_loop);
+
+    if (kPoisonHeapReferences) {
+      __ UnpoisonHeapReference(expected);
+      // Do not unpoison `value` if it is the same register as
+      // `expected`, which has just been unpoisoned.
+      if (value != expected) {
+        __ UnpoisonHeapReference(value);
+      }
+    }
+
+    __ Bind(&done);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const Register obj_;
+  // The location of the offset of the marked reference field within `obj_`.
+  Location field_offset_;
+
+  const Register temp1_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction,
+                                          Location out,
+                                          Location ref,
+                                          Location obj,
+                                          uint32_t offset,
+                                          Location index)
+      : SlowPathCodeMIPS(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::mips::MipsAssembler::Sll and
+          // art::mips::MipsAssembler::Addiu32 below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Move(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Sll(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Addiu32(index_reg, index_reg, offset_);
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+    }
+    mips_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref &&
+          i != obj &&
+          !codegen->IsCoreCalleeSaveRegister(i) &&
+          !codegen->IsBlockedCoreRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on MIPS
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root)
+      : SlowPathCodeMIPS(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    mips_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS);
+};
+
 CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
                                      const MipsInstructionSetFeatures& isa_features,
                                      const CompilerOptions& compiler_options,
@@ -1310,10 +1840,26 @@
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+  GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
+                        IsDirectEntrypoint(entrypoint));
+  if (EntrypointRequiresStackMap(entrypoint)) {
+    RecordPcInfo(instruction, dex_pc, slow_path);
+  }
+}
+
+void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                            HInstruction* instruction,
+                                                            SlowPathCode* slow_path,
+                                                            bool direct) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  GenerateInvokeRuntime(entry_point_offset, direct);
+}
+
+void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) {
   bool reordering = __ SetReorder(false);
-  __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value());
+  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
   __ Jalr(T9);
-  if (IsDirectEntrypoint(entrypoint)) {
+  if (direct) {
     // Reserve argument space on stack (for $a0-$a3) for
     // entrypoints that directly reference native implementations.
     // Called function may use this space to store $a0-$a3 regs.
@@ -1323,9 +1869,6 @@
     __ Nop();  // In delay slot.
   }
   __ SetReorder(reordering);
-  if (EntrypointRequiresStackMap(entrypoint)) {
-    RecordPcInfo(instruction, dex_pc, slow_path);
-  }
 }
 
 void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
@@ -1885,14 +2428,31 @@
 }
 
 void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
+  Primitive::Type type = instruction->GetType();
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier
+                                                       ? LocationSummary::kCallOnSlowPath
+                                                       : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+  if (Primitive::IsFloatingPointType(type)) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(Location::RequiresRegister(),
+                      object_array_get_with_read_barrier
+                          ? Location::kOutputOverlap
+                          : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -1905,7 +2465,9 @@
 
 void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
+  Location out_loc = locations->Out();
   Location index = locations->InAt(1);
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -1915,7 +2477,7 @@
                                         instruction->IsStringCharAt();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1928,7 +2490,7 @@
     }
 
     case Primitive::kPrimByte: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1941,7 +2503,7 @@
     }
 
     case Primitive::kPrimShort: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -1955,7 +2517,7 @@
     }
 
     case Primitive::kPrimChar: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (maybe_compressed_char_at) {
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
@@ -2008,10 +2570,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2024,8 +2585,53 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+                                                        out_loc,
+                                                        obj,
+                                                        data_offset,
+                                                        index,
+                                                        temp,
+                                                        /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+          __ Addu(TMP, obj, TMP);
+          __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction,
+                                                 out_loc,
+                                                 out_loc,
+                                                 obj_loc,
+                                                 data_offset,
+                                                 index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
-      Register out = locations->Out().AsRegisterPairLow<Register>();
+      Register out = out_loc.AsRegisterPairLow<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2039,7 +2645,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      FRegister out = locations->Out().AsFpuRegister<FRegister>();
+      FRegister out = out_loc.AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2053,7 +2659,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      FRegister out = locations->Out().AsFpuRegister<FRegister>();
+      FRegister out = out_loc.AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2070,11 +2676,6 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
-
-  if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
-  }
 }
 
 void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -2116,23 +2717,28 @@
 }
 
 void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      may_need_runtime_call_for_type_check ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+    locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
-    } else {
-      locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
-    }
+    locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary register for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
   }
 }
 
@@ -2142,7 +2748,7 @@
   Location index = locations->InAt(1);
   Location value_location = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -2186,9 +2792,27 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+      } else {
+        __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        Register value = value_location.AsRegister<Register>();
+        __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+      }
+      break;
+    }
+
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
+      if (value_location.IsConstant()) {
+        // Just setting null.
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
         if (index.IsConstant()) {
           data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
@@ -2196,48 +2820,110 @@
           __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
           __ Addu(base_reg, obj, base_reg);
         }
-        if (value_location.IsConstant()) {
-          int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
-          __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
-          DCHECK(!needs_write_barrier);
-        } else {
-          Register value = value_location.AsRegister<Register>();
-          if (kPoisonHeapReferences && needs_write_barrier) {
-            // Note that in the case where `value` is a null reference,
-            // we do not enter this block, as a null reference does not
-            // need poisoning.
-            DCHECK_EQ(value_type, Primitive::kPrimNot);
-            // Use Sw() instead of StoreToOffset() in order to be able to
-            // hold the poisoned reference in AT and thus avoid allocating
-            // yet another temporary register.
-            if (index.IsConstant()) {
-              if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
-                int16_t low = Low16Bits(data_offset);
-                uint32_t high = data_offset - low;
-                __ Addiu32(TMP, obj, high);
-                base_reg = TMP;
-                data_offset = low;
-              }
-            } else {
-              DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
-            }
-            __ PoisonHeapReference(AT, value);
-            __ Sw(AT, base_reg, data_offset);
-            null_checker();
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        DCHECK_EQ(value, 0);
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
+        break;
+      }
+
+      DCHECK(needs_write_barrier);
+      Register value = value_location.AsRegister<Register>();
+      Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+      Register temp2 = TMP;  // Doesn't need to survive slow path.
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      MipsLabel done;
+      SlowPathCodeMIPS* slow_path = nullptr;
+
+      if (may_need_runtime_call_for_type_check) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          MipsLabel non_zero;
+          __ Bnez(value, &non_zero);
+          uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+          if (index.IsConstant()) {
+            data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
           } else {
-            __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+            __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+            __ Addu(base_reg, obj, base_reg);
           }
-          if (needs_write_barrier) {
-            DCHECK_EQ(value_type, Primitive::kPrimNot);
-            codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
-          }
+          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          __ B(&done);
+          __ Bind(&non_zero);
         }
+
+        // Note that when read barriers are enabled, the type checks
+        // are performed without read barriers.  This is fine, even in
+        // the case where a class object is in the from-space after
+        // the flip, as a comparison involving such a type would not
+        // produce a false positive; it may of course produce a false
+        // negative, in which case we would take the ArraySet slow
+        // path.
+
+        // /* HeapReference<Class> */ temp1 = obj->klass_
+        __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker);
+        __ MaybeUnpoisonHeapReference(temp1);
+
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        // /* HeapReference<Class> */ temp2 = value->klass_
+        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+        // If heap poisoning is enabled, no need to unpoison `temp1`
+        // nor `temp2`, as we are comparing two poisoned references.
+
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          MipsLabel do_put;
+          __ Beq(temp1, temp2, &do_put);
+          // If heap poisoning is enabled, the `temp1` reference has
+          // not been unpoisoned yet; unpoison it now.
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+          // If heap poisoning is enabled, no need to unpoison
+          // `temp1`, as we are comparing against null below.
+          __ Bnez(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+        }
+      }
+
+      Register source = value;
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        __ Move(temp1, value);
+        __ PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
       } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        // Note: if heap poisoning is enabled, pAputObject takes care
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
-        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+        __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(base_reg, obj, base_reg);
+      }
+      __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+      if (!may_need_runtime_call_for_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+      if (done.IsLinked()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
       }
       break;
     }
@@ -2327,6 +3013,23 @@
   __ Bgeu(index, length, slow_path->GetEntryLabel());
 }
 
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
 void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
@@ -2337,7 +3040,7 @@
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
           ? LocationSummary::kCallOnSlowPath
           : LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
@@ -2351,15 +3054,20 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -2396,8 +3104,12 @@
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
       __ Bne(temp, cls, slow_path->GetEntryLabel());
@@ -2406,15 +3118,22 @@
 
     case TypeCheckKind::kAbstractClassCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       MipsLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
       __ Beqz(temp, slow_path->GetEntryLabel());
@@ -2425,15 +3144,22 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Walk over the class hierarchy to find a match.
       MipsLabel loop;
       __ Bind(&loop);
       __ Beq(temp, cls, &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception. Otherwise, jump to the beginning of the loop.
       __ Bnez(temp, &loop);
@@ -2443,14 +3169,21 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Do an exact check.
       __ Beq(temp, cls, &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the component type is null, jump to the slow path to throw the exception.
       __ Beqz(temp, slow_path->GetEntryLabel());
       // Otherwise, the object is indeed an array, further check that this component
@@ -2477,11 +3210,19 @@
       // Avoid read barriers to improve performance of the fast path. We can not get false
       // positives by doing this.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // /* HeapReference<Class> */ temp = temp->iftable_
-      __ LoadFromOffset(kLoadWord, temp, temp, iftable_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Iftable is never null.
       __ Lw(TMP, temp, array_length_offset);
       // Loop through the iftable and check if any class matches.
@@ -5032,8 +5773,15 @@
   Primitive::Type field_type = field_info.GetFieldType();
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
+      instruction,
+      generate_volatile
+          ? LocationSummary::kCallOnMainOnly
+          : (object_field_get_with_read_barrier
+              ? LocationSummary::kCallOnSlowPath
+              : LocationSummary::kNoCall));
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -5054,7 +5802,18 @@
     if (Primitive::IsFloatingPointType(instruction->GetType())) {
       locations->SetOut(Location::RequiresFpuRegister());
     } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      // The output overlaps in the case of an object field get with
+      // read barriers enabled: we do not want the move to overwrite the
+      // object's location, as we need it to emit the read barrier.
+      locations->SetOut(Location::RequiresRegister(),
+                        object_field_get_with_read_barrier
+                            ? Location::kOutputOverlap
+                            : Location::kNoOutputOverlap);
+    }
+    if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+      // We need a temporary register for the read barrier marking slow
+      // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier.
+      locations->AddTemp(Location::RequiresRegister());
     }
   }
 }
@@ -5064,7 +5823,9 @@
                                                   uint32_t dex_pc) {
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
+  Location dst_loc = locations->Out();
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
@@ -5107,40 +5868,61 @@
     CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
     if (type == Primitive::kPrimDouble) {
       // FP results are returned in core registers. Need to move them.
-      Location out = locations->Out();
-      if (out.IsFpuRegister()) {
-        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+      if (dst_loc.IsFpuRegister()) {
+        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>());
         __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                         out.AsFpuRegister<FRegister>());
+                         dst_loc.AsFpuRegister<FRegister>());
       } else {
-        DCHECK(out.IsDoubleStackSlot());
+        DCHECK(dst_loc.IsDoubleStackSlot());
         __ StoreToOffset(kStoreWord,
                          locations->GetTemp(1).AsRegister<Register>(),
                          SP,
-                         out.GetStackIndex());
+                         dst_loc.GetStackIndex());
         __ StoreToOffset(kStoreWord,
                          locations->GetTemp(2).AsRegister<Register>(),
                          SP,
-                         out.GetStackIndex() + 4);
+                         dst_loc.GetStackIndex() + 4);
       }
     }
   } else {
-    if (!Primitive::IsFloatingPointType(type)) {
+    if (type == Primitive::kPrimNot) {
+      // /* HeapReference<Object> */ dst = *(obj + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                        dst_loc,
+                                                        obj,
+                                                        offset,
+                                                        temp_loc,
+                                                        /* needs_null_check */ true);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+      }
+    } else if (!Primitive::IsFloatingPointType(type)) {
       Register dst;
       if (type == Primitive::kPrimLong) {
-        DCHECK(locations->Out().IsRegisterPair());
-        dst = locations->Out().AsRegisterPairLow<Register>();
+        DCHECK(dst_loc.IsRegisterPair());
+        dst = dst_loc.AsRegisterPairLow<Register>();
       } else {
-        DCHECK(locations->Out().IsRegister());
-        dst = locations->Out().AsRegister<Register>();
+        DCHECK(dst_loc.IsRegister());
+        dst = dst_loc.AsRegister<Register>();
       }
       __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
-      if (type == Primitive::kPrimNot) {
-        __ MaybeUnpoisonHeapReference(dst);
-      }
     } else {
-      DCHECK(locations->Out().IsFpuRegister());
-      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      DCHECK(dst_loc.IsFpuRegister());
+      FRegister dst = dst_loc.AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
         __ LoadSFromOffset(dst, obj, offset, null_checker);
       } else {
@@ -5149,7 +5931,9 @@
     }
   }
 
-  if (is_volatile) {
+  // Memory barriers, in the case of references, are handled in the
+  // previous switch statement.
+  if (is_volatile && (type != Primitive::kPrimNot)) {
     GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
 }
@@ -5290,7 +6074,6 @@
     }
   }
 
-  // TODO: memory barriers?
   if (needs_write_barrier) {
     Register src = value_location.AsRegister<Register>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
@@ -5320,14 +6103,133 @@
                  instruction->GetValueCanBeNull());
 }
 
-void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
-    HInstruction* instruction ATTRIBUTE_UNUSED,
-    Location root,
-    Register obj,
-    uint32_t offset) {
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  Register out_reg = out.AsRegister<Register>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Move(maybe_temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                           Location root,
+                                                           Register obj,
+                                                           uint32_t offset,
+                                                           ReadBarrierOption read_barrier_option) {
   Register root_reg = root.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
+  if (read_barrier_option == kWithReadBarrier) {
+    DCHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path marking the GC root `root`.
+      Location temp = Location::RegisterLocation(T9);
+      SlowPathCodeMIPS* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(
+              instruction,
+              root,
+              /*entrypoint*/ temp);
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
+      __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Addiu32(root_reg, obj, offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
   } else {
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -5337,6 +6239,226 @@
   }
 }
 
+void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                              Location ref,
+                                                              Register obj,
+                                                              uint32_t offset,
+                                                              Location temp,
+                                                              bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                              Location ref,
+                                                              Register obj,
+                                                              uint32_t data_offset,
+                                                              Location index,
+                                                              Location temp,
+                                                              bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                  Location ref,
+                                                                  Register obj,
+                                                                  uint32_t offset,
+                                                                  Location index,
+                                                                  ScaleFactor scale_factor,
+                                                                  Location temp,
+                                                                  bool needs_null_check,
+                                                                  bool always_update_field) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  __ Sync(0);  // Barrier to prevent load-load reordering.
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    // Load types involving an "index": ArrayGet,
+    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+    // intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+      __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+    } else {
+      // Handle the special case of the
+      // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+      // intrinsics, which use a register pair as index ("long
+      // offset"), of which only the low part contains data.
+      Register index_reg = index.IsRegisterPair()
+          ? index.AsRegisterPairLow<Register>()
+          : index.AsRegister<Register>();
+      __ Sll(TMP, index_reg, scale_factor);
+      __ Addu(TMP, obj, TMP);
+      __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path marking the object `ref` when it is gray.
+  SlowPathCodeMIPS* slow_path;
+  if (always_update_field) {
+    // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address
+    // of the form `obj + field_offset`, where `obj` is a register and
+    // `field_offset` is a register pair (of which only the lower half
+    // is used). Thus `offset` and `scale_factor` above are expected
+    // to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    slow_path = new (GetGraph()->GetArena())
+        ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction,
+                                                  ref,
+                                                  obj,
+                                                  /* field_offset */ index,
+                                                  temp_reg);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref);
+  }
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::GrayState())
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit into the sign bit (31) and
+  // performing a branch on less than zero.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+  __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+  __ Bltz(temp_reg, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                Location out,
+                                                Location ref,
+                                                Location obj,
+                                                uint32_t offset,
+                                                Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                     Location out,
+                                                     Location ref,
+                                                     Location obj,
+                                                     uint32_t offset,
+                                                     Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeMIPS* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5345,7 +6467,8 @@
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
@@ -5360,14 +6483,20 @@
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5385,8 +6514,12 @@
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Classes must be equal for the instanceof to succeed.
       __ Xor(out, out, cls);
       __ Sltiu(out, out, 1);
@@ -5395,15 +6528,22 @@
 
     case TypeCheckKind::kAbstractClassCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       MipsLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqz(out, &done);
       __ Bne(out, cls, &loop);
@@ -5413,15 +6553,22 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       MipsLabel loop, success;
       __ Bind(&loop);
       __ Beq(out, cls, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ Bnez(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -5432,15 +6579,22 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       MipsLabel success;
       __ Beq(out, cls, &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqz(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5455,8 +6609,12 @@
     case TypeCheckKind::kArrayCheck: {
       // No read barrier since the slow path will retry upon failure.
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
                                                                      /* is_fatal */ false);
@@ -5627,9 +6785,6 @@
 
 HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
@@ -5665,9 +6820,6 @@
 
 HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
@@ -5916,12 +7068,13 @@
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Location::RegisterLocation(V0));
+        calling_convention.GetReturnLocation(Primitive::kPrimNot));
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
 
-  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -5976,6 +7129,9 @@
       break;
   }
 
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5985,11 +7141,13 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               base_or_current_method_reg,
-                              ArtMethod::DeclaringClassOffset().Int32Value());
+                              ArtMethod::DeclaringClassOffset().Int32Value(),
+                              read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out,
                      base_or_current_method_reg,
                      codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -5997,6 +7155,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       bool reordering = __ SetReorder(false);
@@ -6006,7 +7165,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
@@ -6020,7 +7179,7 @@
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
       __ SetReorder(reordering);
       generate_null_check = true;
       break;
@@ -6032,7 +7191,7 @@
       bool reordering = __ SetReorder(false);
       __ Bind(&info->high_label);
       __ Lui(out, /* placeholder */ 0x1234);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
       __ SetReorder(reordering);
       break;
     }
@@ -6165,7 +7324,11 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              /* placeholder */ 0x5678,
+                              kCompilerReadBarrierOption);
       __ SetReorder(reordering);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
@@ -6181,7 +7344,11 @@
       bool reordering = __ SetReorder(false);
       __ Bind(&info->high_label);
       __ Lui(out, /* placeholder */ 0x1234);
-      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              /* placeholder */ 0x5678,
+                              kCompilerReadBarrierOption);
       __ SetReorder(reordering);
       return;
     }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 98fee24..3875c4b 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -241,6 +241,38 @@
                       uint32_t dex_pc,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
+
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
@@ -249,7 +281,9 @@
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                Register obj,
-                               uint32_t offset);
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option);
+
   void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
   // When the function returns `false` it means that the condition holds if `dst` is non-zero
   // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
@@ -353,6 +387,91 @@
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
   void MarkGCCard(Register object, Register value, bool value_can_be_null);
 
   // Register allocation.
@@ -400,6 +519,15 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path = nullptr) OVERRIDE;
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path,
+                                           bool direct);
+
+  void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct);
+
   ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index c82533b..78b31e9 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -407,6 +407,528 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
+class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ Bc(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction,
+                                Location ref,
+                                Location entrypoint = Location::NoLocation())
+      : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == S8)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    if (entrypoint_.IsValid()) {
+      mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9);
+      __ Jalr(entrypoint_.AsRegister<GpuRegister>());
+      __ Nop();
+    } else {
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+      // This runtime call does not require a stack map.
+      mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                          instruction_,
+                                                          this);
+    }
+    __ Bc(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if already loaded.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction,
+                                              Location ref,
+                                              GpuRegister obj,
+                                              Location field_offset,
+                                              GpuRegister temp1)
+      : SlowPathCodeMIPS64(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_offset_(field_offset),
+        temp1_(temp1) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+    DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+    __ Bind(GetEntryLabel());
+
+    // Save the old reference.
+    // Note that we cannot use AT or TMP to save the old reference, as those
+    // are used by the code that follows, but we need the old reference after
+    // the call to the ReadBarrierMarkRegX entry point.
+    DCHECK_NE(temp1_, AT);
+    DCHECK_NE(temp1_, TMP);
+    __ Move(temp1_, ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == S8)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+    // This runtime call does not require a stack map.
+    mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                        instruction_,
+                                                        this);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*(obj_ + field_offset_)`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // the compare-and-set (CAS) loop below would abort, leaving the
+    // field as-is.
+    Mips64Label done;
+    __ Beqc(temp1_, ref_reg, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+
+    // Convenience aliases.
+    GpuRegister base = obj_;
+    GpuRegister offset = field_offset_.AsRegister<GpuRegister>();
+    GpuRegister expected = temp1_;
+    GpuRegister value = ref_reg;
+    GpuRegister tmp_ptr = TMP;      // Pointer to actual memory.
+    GpuRegister tmp = AT;           // Value in memory.
+
+    __ Daddu(tmp_ptr, base, offset);
+
+    if (kPoisonHeapReferences) {
+      __ PoisonHeapReference(expected);
+      // Do not poison `value` if it is the same register as
+      // `expected`, which has just been poisoned.
+      if (value != expected) {
+        __ PoisonHeapReference(value);
+      }
+    }
+
+    // do {
+    //   tmp = [r_ptr] - expected;
+    // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+    Mips64Label loop_head, exit_loop;
+    __ Bind(&loop_head);
+    __ Ll(tmp, tmp_ptr);
+    // The LL instruction sign-extends the 32-bit value, but
+    // 32-bit references must be zero-extended. Zero-extend `tmp`.
+    __ Dext(tmp, tmp, 0, 32);
+    __ Bnec(tmp, expected, &exit_loop);
+    __ Move(tmp, value);
+    __ Sc(tmp, tmp_ptr);
+    __ Beqzc(tmp, &loop_head);
+    __ Bind(&exit_loop);
+
+    if (kPoisonHeapReferences) {
+      __ UnpoisonHeapReference(expected);
+      // Do not unpoison `value` if it is the same register as
+      // `expected`, which has just been unpoisoned.
+      if (value != expected) {
+        __ UnpoisonHeapReference(value);
+      }
+    }
+
+    __ Bind(&done);
+    __ Bc(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const GpuRegister obj_;
+  // The location of the offset of the marked reference field within `obj_`.
+  Location field_offset_;
+
+  const GpuRegister temp1_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction,
+                                            Location out,
+                                            Location ref,
+                                            Location obj,
+                                            uint32_t offset,
+                                            Location index)
+      : SlowPathCodeMIPS64(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        GpuRegister index_reg = index_.AsRegister<GpuRegister>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::mips64::Mips64Assembler::Sll and
+          // art::mips64::MipsAssembler::Addiu32 below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Move(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Sll(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Addiu32(index_reg, index_reg, offset_);
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegister());
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+    }
+    mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ Bc(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierForHeapReferenceSlowPathMIPS64";
+  }
+
+ private:
+  GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>());
+    size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref &&
+          i != obj &&
+          !codegen->IsCoreCalleeSaveRegister(i) &&
+          !codegen->IsBlockedCoreRegister(i)) {
+        return static_cast<GpuRegister>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on MIPS64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root)
+      : SlowPathCodeMIPS64(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                 root_,
+                                 Primitive::kPrimNot);
+    mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ Bc(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64);
+};
+
 CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
                                          const Mips64InstructionSetFeatures& isa_features,
                                          const CompilerOptions& compiler_options,
@@ -1140,23 +1662,32 @@
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
-  __ LoadFromOffset(kLoadDoubleword,
-                    T9,
-                    TR,
-                    GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
-  __ Jalr(T9);
-  __ Nop();
+  GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
   if (EntrypointRequiresStackMap(entrypoint)) {
     RecordPcInfo(instruction, dex_pc, slow_path);
   }
 }
 
+void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                              HInstruction* instruction,
+                                                              SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) {
+  __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+  __ Jalr(T9);
+  __ Nop();
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
                                                                       GpuRegister class_reg) {
   __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
   __ LoadConst32(AT, mirror::Class::kStatusInitialized);
   __ Bltc(TMP, AT, slow_path->GetEntryLabel());
-  // TODO: barrier needed?
+  // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+  __ Sync(0);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1447,14 +1978,31 @@
 }
 
 void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
+  Primitive::Type type = instruction->GetType();
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier
+                                                       ? LocationSummary::kCallOnSlowPath
+                                                       : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+  if (Primitive::IsFloatingPointType(type)) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(Location::RequiresRegister(),
+                      object_array_get_with_read_barrier
+                          ? Location::kOutputOverlap
+                          : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -1467,7 +2015,9 @@
 
 void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+  Location out_loc = locations->Out();
   Location index = locations->InAt(1);
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -1477,7 +2027,7 @@
                                         instruction->IsStringCharAt();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1490,7 +2040,7 @@
     }
 
     case Primitive::kPrimByte: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1503,7 +2053,7 @@
     }
 
     case Primitive::kPrimShort: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -1517,7 +2067,7 @@
     }
 
     case Primitive::kPrimChar: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (maybe_compressed_char_at) {
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
@@ -1570,10 +2120,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
       if (index.IsConstant()) {
         size_t offset =
@@ -1587,8 +2136,53 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+                                                        out_loc,
+                                                        obj,
+                                                        data_offset,
+                                                        index,
+                                                        temp,
+                                                        /* needs_null_check */ true);
+      } else {
+        GpuRegister out = out_loc.AsRegister<GpuRegister>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+          __ Addu(TMP, obj, TMP);
+          __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction,
+                                                 out_loc,
+                                                 out_loc,
+                                                 obj_loc,
+                                                 data_offset,
+                                                 index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -1602,7 +2196,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -1616,7 +2210,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -1633,11 +2227,6 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
-
-  if (type == Primitive::kPrimNot) {
-    GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-    __ MaybeUnpoisonHeapReference(out);
-  }
 }
 
 void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
@@ -1679,23 +2268,28 @@
 }
 
 void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      may_need_runtime_call_for_type_check ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+    locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
-    } else {
-      locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
-    }
+    locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary register for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
   }
 }
 
@@ -1705,7 +2299,7 @@
   Location index = locations->InAt(1);
   Location value_location = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -1749,68 +2343,138 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+      } else {
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+      }
+      break;
+    }
+
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
+      if (value_location.IsConstant()) {
+        // Just setting null.
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
         if (index.IsConstant()) {
           data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
         } else {
-          DCHECK(index.IsRegister()) << index;
           __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
           __ Daddu(base_reg, obj, base_reg);
         }
-        if (value_location.IsConstant()) {
-          int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
-          __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
-          DCHECK(!needs_write_barrier);
-        } else {
-          GpuRegister value = value_location.AsRegister<GpuRegister>();
-          if (kPoisonHeapReferences && needs_write_barrier) {
-            // Note that in the case where `value` is a null reference,
-            // we do not enter this block, as a null reference does not
-            // need poisoning.
-            DCHECK_EQ(value_type, Primitive::kPrimNot);
-            // Use Sw() instead of StoreToOffset() in order to be able to
-            // hold the poisoned reference in AT and thus avoid allocating
-            // yet another temporary register.
-            if (index.IsConstant()) {
-              if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
-                int16_t low16 = Low16Bits(data_offset);
-                // For consistency with StoreToOffset() and such treat data_offset as int32_t.
-                uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16;
-                int16_t upper16 = High16Bits(high48);
-                // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a
-                // compensatory 64KB added, which may push `high48` above 2GB and require
-                // the dahi instruction.
-                int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0);
-                __ Daui(TMP, obj, upper16);
-                if (higher16 != 0) {
-                  __ Dahi(TMP, higher16);
-                }
-                base_reg = TMP;
-                data_offset = low16;
-              }
-            } else {
-              DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
-            }
-            __ PoisonHeapReference(AT, value);
-            __ Sw(AT, base_reg, data_offset);
-            null_checker();
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        DCHECK_EQ(value, 0);
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
+        break;
+      }
+
+      DCHECK(needs_write_barrier);
+      GpuRegister value = value_location.AsRegister<GpuRegister>();
+      GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+      GpuRegister temp2 = TMP;  // Doesn't need to survive slow path.
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      Mips64Label done;
+      SlowPathCodeMIPS64* slow_path = nullptr;
+
+      if (may_need_runtime_call_for_type_check) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          Mips64Label non_zero;
+          __ Bnezc(value, &non_zero);
+          uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+          if (index.IsConstant()) {
+            data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
           } else {
-            __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+            __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+            __ Daddu(base_reg, obj, base_reg);
           }
-          if (needs_write_barrier) {
-            DCHECK_EQ(value_type, Primitive::kPrimNot);
-            codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
-          }
+          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          __ Bc(&done);
+          __ Bind(&non_zero);
         }
+
+        // Note that when read barriers are enabled, the type checks
+        // are performed without read barriers.  This is fine, even in
+        // the case where a class object is in the from-space after
+        // the flip, as a comparison involving such a type would not
+        // produce a false positive; it may of course produce a false
+        // negative, in which case we would take the ArraySet slow
+        // path.
+
+        // /* HeapReference<Class> */ temp1 = obj->klass_
+        __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker);
+        __ MaybeUnpoisonHeapReference(temp1);
+
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset);
+        // /* HeapReference<Class> */ temp2 = value->klass_
+        __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset);
+        // If heap poisoning is enabled, no need to unpoison `temp1`
+        // nor `temp2`, as we are comparing two poisoned references.
+
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          Mips64Label do_put;
+          __ Beqc(temp1, temp2, &do_put);
+          // If heap poisoning is enabled, the `temp1` reference has
+          // not been unpoisoned yet; unpoison it now.
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset);
+          // If heap poisoning is enabled, no need to unpoison
+          // `temp1`, as we are comparing against null below.
+          __ Bnezc(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ Bnec(temp1, temp2, slow_path->GetEntryLabel());
+        }
+      }
+
+      GpuRegister source = value;
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        __ Move(temp1, value);
+        __ PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
       } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        // Note: if heap poisoning is enabled, pAputObject takes care
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
-        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+      if (!may_need_runtime_call_for_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+      if (done.IsLinked()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
       }
       break;
     }
@@ -1900,6 +2564,23 @@
   __ Bgeuc(index, length, slow_path->GetEntryLabel());
 }
 
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
 void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
@@ -1910,7 +2591,7 @@
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
           ? LocationSummary::kCallOnSlowPath
           : LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
@@ -1924,15 +2605,20 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>();
+  Location temp_loc = locations->GetTemp(0);
+  GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -1969,8 +2655,12 @@
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
       __ Bnec(temp, cls, slow_path->GetEntryLabel());
@@ -1979,15 +2669,22 @@
 
     case TypeCheckKind::kAbstractClassCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Mips64Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
       __ Beqzc(temp, slow_path->GetEntryLabel());
@@ -1998,15 +2695,22 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Walk over the class hierarchy to find a match.
       Mips64Label loop;
       __ Bind(&loop);
       __ Beqc(temp, cls, &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception. Otherwise, jump to the beginning of the loop.
       __ Bnezc(temp, &loop);
@@ -2016,14 +2720,21 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Do an exact check.
       __ Beqc(temp, cls, &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, component_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the component type is null, jump to the slow path to throw the exception.
       __ Beqzc(temp, slow_path->GetEntryLabel());
       // Otherwise, the object is indeed an array, further check that this component
@@ -2050,11 +2761,19 @@
       // Avoid read barriers to improve performance of the fast path. We can not get false
       // positives by doing this.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // /* HeapReference<Class> */ temp = temp->iftable_
-      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, iftable_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
       // Iftable is never null.
       __ Lw(TMP, temp, array_length_offset);
       // Loop through the iftable and check if any class matches.
@@ -3270,14 +3989,31 @@
 }
 
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
-                                            const FieldInfo& field_info ATTRIBUTE_UNUSED) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+                                            const FieldInfo& field_info) {
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      object_field_get_with_read_barrier
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object field get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // object's location, as we need it to emit the read barrier.
+    locations->SetOut(Location::RequiresRegister(),
+                      object_field_get_with_read_barrier
+                          ? Location::kOutputOverlap
+                          : Location::kNoOutputOverlap);
+  }
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -3285,8 +4021,11 @@
                                                     const FieldInfo& field_info) {
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+  Location dst_loc = locations->Out();
   LoadOperandType load_type = kLoadUnsignedByte;
+  bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
@@ -3319,19 +4058,46 @@
       UNREACHABLE();
   }
   if (!Primitive::IsFloatingPointType(type)) {
-    DCHECK(locations->Out().IsRegister());
-    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-    __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+    DCHECK(dst_loc.IsRegister());
+    GpuRegister dst = dst_loc.AsRegister<GpuRegister>();
+    if (type == Primitive::kPrimNot) {
+      // /* HeapReference<Object> */ dst = *(obj + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                        dst_loc,
+                                                        obj,
+                                                        offset,
+                                                        temp_loc,
+                                                        /* needs_null_check */ true);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+      }
+    } else {
+      __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+    }
   } else {
-    DCHECK(locations->Out().IsFpuRegister());
-    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    DCHECK(dst_loc.IsFpuRegister());
+    FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>();
     __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker);
   }
-  // TODO: memory barrier?
 
-  if (type == Primitive::kPrimNot) {
-    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-    __ MaybeUnpoisonHeapReference(dst);
+  // Memory barriers, in the case of references, are handled in the
+  // previous switch statement.
+  if (is_volatile && (type != Primitive::kPrimNot)) {
+    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
 }
 
@@ -3355,6 +4121,7 @@
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location value_location = locations->InAt(1);
   StoreOperandType store_type = kStoreByte;
+  bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
   auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -3382,6 +4149,10 @@
       UNREACHABLE();
   }
 
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+  }
+
   if (value_location.IsConstant()) {
     int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
     __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker);
@@ -3405,12 +4176,16 @@
       __ StoreFpuToOffset(store_type, src, obj, offset, null_checker);
     }
   }
-  // TODO: memory barriers?
+
   if (needs_write_barrier) {
     DCHECK(value_location.IsRegister());
     GpuRegister src = value_location.AsRegister<GpuRegister>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
+
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  }
 }
 
 void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -3429,14 +4204,134 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  GpuRegister out_reg = out.AsRegister<GpuRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  GpuRegister out_reg = out.AsRegister<GpuRegister>();
+  GpuRegister obj_reg = obj.AsRegister<GpuRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
-    HInstruction* instruction ATTRIBUTE_UNUSED,
+    HInstruction* instruction,
     Location root,
     GpuRegister obj,
-    uint32_t offset) {
+    uint32_t offset,
+    ReadBarrierOption read_barrier_option) {
   GpuRegister root_reg = root.AsRegister<GpuRegister>();
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
+  if (read_barrier_option == kWithReadBarrier) {
+    DCHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path marking the GC root `root`.
+      Location temp = Location::RegisterLocation(T9);
+      SlowPathCodeMIPS64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(
+              instruction,
+              root,
+              /*entrypoint*/ temp);
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
+      __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
   } else {
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -3446,6 +4341,219 @@
   }
 }
 
+void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                GpuRegister obj,
+                                                                uint32_t offset,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                GpuRegister obj,
+                                                                uint32_t data_offset,
+                                                                Location index,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    GpuRegister obj,
+                                                                    uint32_t offset,
+                                                                    Location index,
+                                                                    ScaleFactor scale_factor,
+                                                                    Location temp,
+                                                                    bool needs_null_check,
+                                                                    bool always_update_field) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+  GpuRegister temp_reg = temp.AsRegister<GpuRegister>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  __ Sync(0);  // Barrier to prevent load-load reordering.
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    // Load types involving an "index": ArrayGet,
+    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+    // intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+      __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset);
+    } else {
+      GpuRegister index_reg = index.AsRegister<GpuRegister>();
+      __ Dsll(TMP, index_reg, scale_factor);
+      __ Daddu(TMP, obj, TMP);
+      __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path marking the object `ref` when it is gray.
+  SlowPathCodeMIPS64* slow_path;
+  if (always_update_field) {
+    // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address
+    // of the form `obj + field_offset`, where `obj` is a register and
+    // `field_offset` is a register. Thus `offset` and `scale_factor`
+    // above are expected to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    slow_path = new (GetGraph()->GetArena())
+        ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction,
+                                                    ref,
+                                                    obj,
+                                                    /* field_offset */ index,
+                                                    temp_reg);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref);
+  }
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::GrayState())
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit into the sign bit (31) and
+  // performing a branch on less than zero.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+  __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+  __ Bltzc(temp_reg, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ Bc(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location ref,
+                                                       Location obj,
+                                                       uint32_t offset,
+                                                       Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<GpuRegister>());
+  }
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                         Location out,
+                                                         Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeMIPS64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ Bc(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -3454,7 +4562,8 @@
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
@@ -3469,14 +4578,20 @@
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location out_loc = locations->Out();
+  GpuRegister out = out_loc.AsRegister<GpuRegister>();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -3494,8 +4609,12 @@
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Classes must be equal for the instanceof to succeed.
       __ Xor(out, out, cls);
       __ Sltiu(out, out, 1);
@@ -3504,15 +4623,22 @@
 
     case TypeCheckKind::kAbstractClassCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Mips64Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqzc(out, &done);
       __ Bnec(out, cls, &loop);
@@ -3522,15 +4648,22 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       Mips64Label loop, success;
       __ Bind(&loop);
       __ Beqc(out, cls, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ Bnezc(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Bc(&done);
@@ -3541,15 +4674,22 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       Mips64Label success;
       __ Beqc(out, cls, &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      __ LoadFromOffset(kLoadUnsignedWord, out, out, component_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqzc(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -3564,8 +4704,12 @@
     case TypeCheckKind::kArrayCheck: {
       // No read barrier since the slow path will retry upon failure.
       // /* HeapReference<Class> */ out = obj->klass_
-      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
                                                                        /* is_fatal */ false);
@@ -3735,9 +4879,6 @@
 
 HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   bool fallback_load = false;
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
@@ -3765,9 +4906,6 @@
 
 HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   bool fallback_load = false;
   switch (desired_class_load_kind) {
     case HLoadClass::LoadKind::kInvalid:
@@ -3960,7 +5098,8 @@
   }
   DCHECK(!cls->NeedsAccessCheck());
 
-  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -3989,6 +5128,9 @@
       current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
   }
 
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass:
@@ -3998,10 +5140,12 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               current_method_reg,
-                              ArtMethod::DeclaringClassOffset().Int32Value());
+                              ArtMethod::DeclaringClassOffset().Int32Value(),
+                              read_barrier_option);
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out,
                      kLoadUnsignedWord,
                      codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -4009,6 +5153,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -4016,7 +5161,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
@@ -4029,7 +5174,7 @@
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
       generate_null_check = true;
       break;
     }
@@ -4039,7 +5184,7 @@
                      codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                           cls->GetTypeIndex(),
                                                           cls->GetClass()));
-      GenerateGcRootFieldLoad(cls, out_loc, out, 0);
+      GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
     case HLoadClass::LoadKind::kInvalid:
@@ -4136,7 +5281,11 @@
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
-      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              /* placeholder */ 0x5678,
+                              kCompilerReadBarrierOption);
       SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqzc(out, slow_path->GetEntryLabel());
@@ -4149,7 +5298,7 @@
                      codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
                                                            load->GetStringIndex(),
                                                            load->GetString()));
-      GenerateGcRootFieldLoad(load, out_loc, out, 0);
+      GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption);
       return;
     default:
       break;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 6040dc9..fd1a174 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -237,6 +237,38 @@
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
+
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
@@ -245,7 +277,9 @@
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                GpuRegister obj,
-                               uint32_t offset);
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option);
+
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              Mips64Label* true_target,
@@ -316,6 +350,91 @@
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             GpuRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             GpuRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 GpuRegister obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
   void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
 
   // Register allocation.
@@ -366,6 +485,14 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path = nullptr) OVERRIDE;
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
+  void GenerateInvokeRuntime(int32_t entry_point_offset);
+
   ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6d3702f..49f099f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3660,7 +3660,7 @@
 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
-  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   bool is_div = instruction->IsDiv();
   LocationSummary* locations = instruction->GetLocations();
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 19f668d..e1cf248 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -672,6 +672,32 @@
   return result;
 }
 
+static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass,
+                                               ArtMethod* resolved_method,
+                                               HInstruction* invoke_instruction,
+                                               PointerSize pointer_size)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (Runtime::Current()->IsAotCompiler()) {
+    // We can get unrelated types when working with profiles (corruption,
+    // systme updates, or anyone can write to it). So first check if the class
+    // actually implements the declaring class of the method that is being
+    // called in bytecode.
+    // Note: the lookup methods used below require to have assignable types.
+    if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) {
+      return nullptr;
+    }
+  }
+
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  return resolved_method;
+}
+
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         Handle<mirror::ObjectArray<mirror::Class>> classes) {
@@ -690,20 +716,20 @@
 
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   PointerSize pointer_size = class_linker->GetImagePointerSize();
-  if (invoke_instruction->IsInvokeInterface()) {
-    resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForInterface(
-        resolved_method, pointer_size);
-  } else {
-    DCHECK(invoke_instruction->IsInvokeVirtual());
-    resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
-        resolved_method, pointer_size);
-  }
+  Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
+  resolved_method = ResolveMethodFromInlineCache(
+      monomorphic_type, resolved_method, invoke_instruction, pointer_size);
+
   LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
-  DCHECK(resolved_method != nullptr);
+  if (resolved_method == nullptr) {
+    // Bogus AOT profile, bail.
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    return false;
+  }
+
   HInstruction* receiver = invoke_instruction->InputAt(0);
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
-  Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
   if (!TryInlineAndReplace(invoke_instruction,
                            resolved_method,
                            ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true),
@@ -843,11 +869,14 @@
     ArtMethod* method = nullptr;
 
     Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i));
-    if (invoke_instruction->IsInvokeInterface()) {
-      method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size);
-    } else {
-      DCHECK(invoke_instruction->IsInvokeVirtual());
-      method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+    method = ResolveMethodFromInlineCache(
+        handle, resolved_method, invoke_instruction, pointer_size);
+    if (method == nullptr) {
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      // AOT profile is bogus. This loop expects to iterate over all entries,
+      // so just just continue.
+      all_targets_inlined = false;
+      continue;
     }
 
     HInstruction* receiver = invoke_instruction->InputAt(0);
@@ -892,7 +921,7 @@
         }
         invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
         // Because the inline cache data can be populated concurrently, we force the end of the
-        // iteration. Otherhwise, we could see a new receiver type.
+        // iteration. Otherwise, we could see a new receiver type.
         break;
       } else {
         CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bf85b19..b67793c 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1514,21 +1514,31 @@
                     Thread::PeerOffset<kMipsPointerSize>().Int32Value());
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  bool can_call =
-       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
-       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile;
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           can_call ?
-                                                               LocationSummary::kCallOnSlowPath :
-                                                               LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafeGet(HInvoke* invoke,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1539,49 +1549,109 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot)) << type;
   MipsAssembler* assembler = codegen->GetAssembler();
+  // Target register.
+  Location trg_loc = locations->Out();
   // Object pointer.
-  Register base = locations->InAt(1).AsRegister<Register>();
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();
   // The "offset" argument is passed as a "long". Since this code is for
   // a 32-bit processor, we can only use 32-bit addresses, so we only
   // need the low 32-bits of offset.
-  Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>();
+  Location offset_loc = locations->InAt(2);
+  Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
 
-  __ Addu(TMP, base, offset_lo);
-  if (is_volatile) {
-    __ Sync(0);
+  if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+    __ Addu(TMP, base, offset_lo);
   }
-  if (type == Primitive::kPrimLong) {
-    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
-    Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
 
-    if (is_R6) {
-      __ Lw(trg_lo, TMP, 0);
-      __ Lw(trg_hi, TMP, 4);
-    } else {
-      __ Lwr(trg_lo, TMP, 0);
-      __ Lwl(trg_lo, TMP, 3);
-      __ Lwr(trg_hi, TMP, 4);
-      __ Lwl(trg_hi, TMP, 7);
-    }
-  } else {
-    Register trg = locations->Out().AsRegister<Register>();
-
-    if (is_R6) {
-      __ Lw(trg, TMP, 0);
-    } else {
-      __ Lwr(trg, TMP, 0);
-      __ Lwl(trg, TMP, 3);
+  switch (type) {
+    case Primitive::kPrimLong: {
+      Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+      CHECK(!is_volatile);  // TODO: support atomic 8-byte volatile loads.
+      if (is_R6) {
+        __ Lw(trg_lo, TMP, 0);
+        __ Lw(trg_hi, TMP, 4);
+      } else {
+        __ Lwr(trg_lo, TMP, 0);
+        __ Lwl(trg_lo, TMP, 3);
+        __ Lwr(trg_hi, TMP, 4);
+        __ Lwl(trg_hi, TMP, 7);
+      }
+      break;
     }
 
-    if (type == Primitive::kPrimNot) {
-      __ MaybeUnpoisonHeapReference(trg);
+    case Primitive::kPrimInt: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (is_R6) {
+        __ Lw(trg, TMP, 0);
+      } else {
+        __ Lwr(trg, TMP, 0);
+        __ Lwl(trg, TMP, 3);
+      }
+      if (is_volatile) {
+        __ Sync(0);
+      }
+      break;
     }
+
+    case Primitive::kPrimNot: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                             trg_loc,
+                                                             base,
+                                                             /* offset */ 0U,
+                                                             /* index */ offset_loc,
+                                                             TIMES_1,
+                                                             temp,
+                                                             /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Sync(0);
+          }
+        } else {
+          if (is_R6) {
+            __ Lw(trg, TMP, 0);
+          } else {
+            __ Lwr(trg, TMP, 0);
+            __ Lwl(trg, TMP, 3);
+          }
+          if (is_volatile) {
+            __ Sync(0);
+          }
+          codegen->GenerateReadBarrierSlow(invoke,
+                                           trg_loc,
+                                           trg_loc,
+                                           base_loc,
+                                           /* offset */ 0U,
+                                           /* index */ offset_loc);
+        }
+      } else {
+        if (is_R6) {
+          __ Lw(trg, TMP, 0);
+        } else {
+          __ Lwr(trg, TMP, 0);
+          __ Lwl(trg, TMP, 3);
+        }
+        if (is_volatile) {
+          __ Sync(0);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
   }
 }
 
 // int sun.misc.Unsafe.getInt(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
@@ -1590,7 +1660,7 @@
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1599,25 +1669,16 @@
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_);
 }
 
-// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_);
-}
-
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1626,7 +1687,7 @@
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1643,6 +1704,8 @@
   locations->SetInAt(3, Location::RequiresRegister());
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafePut(LocationSummary* locations,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1681,7 +1744,7 @@
   } else {
     Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
     Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
-
+    CHECK(!is_volatile);  // TODO: support atomic 8-byte volatile stores.
     if (is_R6) {
       __ Sw(value_lo, TMP, 0);
       __ Sw(value_hi, TMP, 4);
@@ -1815,50 +1878,71 @@
                codegen_);
 }
 
-// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(),
-               Primitive::kPrimLong,
-               /* is_volatile */ true,
-               /* is_ordered */ false,
-               IsR6(),
-               codegen_);
-}
-
-static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
-
   locations->SetOut(Location::RequiresRegister());
+
+  // Temporary register used in CAS by (Baker) read barrier.
+  if (can_call) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) {
   MipsAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   bool isR6 = codegen->GetInstructionSetFeatures().IsR6();
   Register base = locations->InAt(1).AsRegister<Register>();
-  Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>();
+  Location offset_loc = locations->InAt(2);
+  Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
   Register expected = locations->InAt(3).AsRegister<Register>();
   Register value = locations->InAt(4).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
 
   DCHECK_NE(base, out);
   DCHECK_NE(offset_lo, out);
   DCHECK_NE(expected, out);
 
   if (type == Primitive::kPrimNot) {
-    // Mark card for object assuming new value is stored.
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
     codegen->MarkGCCard(base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      Location temp = locations->GetTemp(0);
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          temp,
+          /* needs_null_check */ false,
+          /* always_update_field */ true);
+    }
   }
 
   MipsLabel loop_head, exit_loop;
@@ -1926,20 +2010,30 @@
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
 }
 
 // int java.lang.String.compareTo(String anotherString)
@@ -2664,6 +2758,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile);
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile);
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 1ee89cf..6098767 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1151,16 +1151,31 @@
                     Thread::PeerOffset<kMips64PointerSize>().Int32Value());
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafeGet(HInvoke* invoke,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1168,30 +1183,71 @@
   LocationSummary* locations = invoke->GetLocations();
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
-         (type == Primitive::kPrimNot));
+         (type == Primitive::kPrimNot)) << type;
   Mips64Assembler* assembler = codegen->GetAssembler();
+  // Target register.
+  Location trg_loc = locations->Out();
+  GpuRegister trg = trg_loc.AsRegister<GpuRegister>();
   // Object pointer.
-  GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+  Location base_loc = locations->InAt(1);
+  GpuRegister base = base_loc.AsRegister<GpuRegister>();
   // Long offset.
-  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
-  GpuRegister trg = locations->Out().AsRegister<GpuRegister>();
+  Location offset_loc = locations->InAt(2);
+  GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
 
-  __ Daddu(TMP, base, offset);
-  if (is_volatile) {
-    __ Sync(0);
+  if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+    __ Daddu(TMP, base, offset);
   }
+
   switch (type) {
+    case Primitive::kPrimLong:
+      __ Ld(trg, TMP, 0);
+      if (is_volatile) {
+        __ Sync(0);
+      }
+      break;
+
     case Primitive::kPrimInt:
       __ Lw(trg, TMP, 0);
+      if (is_volatile) {
+        __ Sync(0);
+      }
       break;
 
     case Primitive::kPrimNot:
-      __ Lwu(trg, TMP, 0);
-      __ MaybeUnpoisonHeapReference(trg);
-      break;
-
-    case Primitive::kPrimLong:
-      __ Ld(trg, TMP, 0);
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                             trg_loc,
+                                                             base,
+                                                             /* offset */ 0U,
+                                                             /* index */ offset_loc,
+                                                             TIMES_1,
+                                                             temp,
+                                                             /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Sync(0);
+          }
+        } else {
+          __ Lwu(trg, TMP, 0);
+          if (is_volatile) {
+            __ Sync(0);
+          }
+          codegen->GenerateReadBarrierSlow(invoke,
+                                           trg_loc,
+                                           trg_loc,
+                                           base_loc,
+                                           /* offset */ 0U,
+                                           /* index */ offset_loc);
+        }
+      } else {
+        __ Lwu(trg, TMP, 0);
+        if (is_volatile) {
+          __ Sync(0);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
       break;
 
     default:
@@ -1202,7 +1258,7 @@
 
 // int sun.misc.Unsafe.getInt(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1211,7 +1267,7 @@
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1220,7 +1276,7 @@
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
@@ -1229,7 +1285,7 @@
 
 // long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
@@ -1238,7 +1294,7 @@
 
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1247,7 +1303,7 @@
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1264,6 +1320,8 @@
   locations->SetInAt(3, Location::RequiresRegister());
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafePut(LocationSummary* locations,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1429,35 +1487,70 @@
                codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
-
   locations->SetOut(Location::RequiresRegister());
+
+  // Temporary register used in CAS by (Baker) read barrier.
+  if (can_call) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
   Mips64Assembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+  Location offset_loc = locations->InAt(2);
+  GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
   GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>();
   GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location out_loc = locations->Out();
+  GpuRegister out = out_loc.AsRegister<GpuRegister>();
 
   DCHECK_NE(base, out);
   DCHECK_NE(offset, out);
   DCHECK_NE(expected, out);
 
   if (type == Primitive::kPrimNot) {
-    // Mark card for object assuming new value is stored.
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
     codegen->MarkGCCard(base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      Location temp = locations->GetTemp(0);
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          temp,
+          /* needs_null_check */ false,
+          /* always_update_field */ true);
+    }
   }
 
   Mips64Label loop_head, exit_loop;
@@ -1521,29 +1614,39 @@
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+  GenCas(invoke, Primitive::kPrimLong, codegen_);
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
 }
 
 // int java.lang.String.compareTo(String anotherString)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3c6d2d6..eb88fde 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -454,6 +454,8 @@
 static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
   return instruction_set == kArm64
       || instruction_set == kThumb2
+      || instruction_set == kMips
+      || instruction_set == kMips64
       || instruction_set == kX86
       || instruction_set == kX86_64;
 }
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
index d71c2fe..ad3a099 100644
--- a/compiler/utils/atomic_method_ref_map-inl.h
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -42,7 +42,7 @@
 inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
   const ElementArray* const array = GetArray(ref.dex_file);
   if (array == nullptr) {
-    return kInsertResultInvalidDexFile;
+    return false;
   }
   *out = (*array)[ref.dex_method_index].LoadRelaxed();
   return true;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 8a5ae75..7d6a7f8 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1661,6 +1661,7 @@
   }
 }
 
+// TODO: don't use rtmp, use daui, dahi, dati.
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
   if (IsInt<16>(value)) {
     Daddiu(rt, rs, value);
diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc
index dea396e..42d061e 100644
--- a/compiler/utils/mips64/managed_register_mips64.cc
+++ b/compiler/utils/mips64/managed_register_mips64.cc
@@ -26,6 +26,11 @@
   CHECK(IsValidManagedRegister());
   CHECK(other.IsValidManagedRegister());
   if (Equals(other)) return true;
+  if (IsFpuRegister() && other.IsVectorRegister()) {
+    return (AsFpuRegister() == other.AsOverlappingFpuRegister());
+  } else if (IsVectorRegister() && other.IsFpuRegister()) {
+    return (AsVectorRegister() == other.AsOverlappingVectorRegister());
+  }
   return false;
 }
 
@@ -36,6 +41,8 @@
     os << "GPU: " << static_cast<int>(AsGpuRegister());
   } else if (IsFpuRegister()) {
      os << "FpuRegister: " << static_cast<int>(AsFpuRegister());
+  } else if (IsVectorRegister()) {
+     os << "VectorRegister: " << static_cast<int>(AsVectorRegister());
   } else {
     os << "??: " << RegId();
   }
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index c9f9556..3980199 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -30,11 +30,27 @@
 const int kNumberOfFpuRegIds = kNumberOfFpuRegisters;
 const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters;
 
-const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds;
-const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds;
+const int kNumberOfVecRegIds = kNumberOfVectorRegisters;
+const int kNumberOfVecAllocIds = kNumberOfVectorRegisters;
 
-// An instance of class 'ManagedRegister' represents a single GPU register (enum
-// Register) or a double precision FP register (enum FpuRegister)
+const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds;
+const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds;
+
+// Register ids map:
+//   [0..R[  core registers (enum GpuRegister)
+//   [R..F[  floating-point registers (enum FpuRegister)
+//   [F..W[  MSA vector registers (enum VectorRegister)
+// where
+//   R = kNumberOfGpuRegIds
+//   F = R + kNumberOfFpuRegIds
+//   W = F + kNumberOfVecRegIds
+
+// An instance of class 'ManagedRegister' represents a single Mips64 register.
+// A register can be one of the following:
+//  * core register (enum GpuRegister)
+//  * floating-point register (enum FpuRegister)
+//  * MSA vector register (enum VectorRegister)
+//
 // 'ManagedRegister::NoRegister()' provides an invalid register.
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
@@ -49,6 +65,21 @@
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
+  constexpr VectorRegister AsVectorRegister() const {
+    CHECK(IsVectorRegister());
+    return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters));
+  }
+
+  constexpr FpuRegister AsOverlappingFpuRegister() const {
+    CHECK(IsValidManagedRegister());
+    return static_cast<FpuRegister>(AsVectorRegister());
+  }
+
+  constexpr VectorRegister AsOverlappingVectorRegister() const {
+    CHECK(IsValidManagedRegister());
+    return static_cast<VectorRegister>(AsFpuRegister());
+  }
+
   constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
@@ -60,6 +91,12 @@
     return (0 <= test) && (test < kNumberOfFpuRegIds);
   }
 
+  constexpr bool IsVectorRegister() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+    return (0 <= test) && (test < kNumberOfVecRegIds);
+  }
+
   void Print(std::ostream& os) const;
 
   // Returns true if the two managed-registers ('this' and 'other') overlap.
@@ -77,6 +114,11 @@
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
+  static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) {
+    CHECK_NE(r, kNoVectorRegister);
+    return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+  }
+
  private:
   constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc
new file mode 100644
index 0000000..8b72d7e
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64_test.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_mips64.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace mips64 {
+
+TEST(Mips64ManagedRegister, NoRegister) {
+  Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64();
+  EXPECT_TRUE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.Overlaps(reg));
+}
+
+TEST(Mips64ManagedRegister, GpuRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(ZERO, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(AT);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(AT, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(V0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(V0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(A0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A7);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(A7, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(T0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(T0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(T3);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(T3, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(S0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(S0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(GP);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(GP, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(SP);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(SP, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(RA);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(RA, reg.AsGpuRegister());
+}
+
+TEST(Mips64ManagedRegister, FpuRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+  Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F0, reg.AsFpuRegister());
+  EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F1);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W1);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F1, reg.AsFpuRegister());
+  EXPECT_EQ(W1, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F20);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W20);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F20, reg.AsFpuRegister());
+  EXPECT_EQ(W20, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F31);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F31, reg.AsFpuRegister());
+  EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+}
+
+TEST(Mips64ManagedRegister, VectorRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0);
+  Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W0, reg.AsVectorRegister());
+  EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W2);
+  freg = Mips64ManagedRegister::FromFpuRegister(F2);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W2, reg.AsVectorRegister());
+  EXPECT_EQ(F2, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W13);
+  freg = Mips64ManagedRegister::FromFpuRegister(F13);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W13, reg.AsVectorRegister());
+  EXPECT_EQ(F13, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W29);
+  freg = Mips64ManagedRegister::FromFpuRegister(F29);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W29, reg.AsVectorRegister());
+  EXPECT_EQ(F29, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29)));
+}
+
+TEST(Mips64ManagedRegister, Equals) {
+  ManagedRegister no_reg = ManagedRegister::NoRegister();
+  EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1);
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2);
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31);
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+  EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+  EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+TEST(Mips64ManagedRegister, Overlaps) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+  Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F4);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W4);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W4, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F16);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W16);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W16, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F31);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W0);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W4);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F4);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F4, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W16);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F16);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F16, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W31);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F31);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F31, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A0);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(S0);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(RA);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 105610e..0536f322 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1528,6 +1528,111 @@
   return new_class_data_order;
 }
 
+void DexLayout::LayoutStringData(const DexFile* dex_file) {
+  const size_t num_strings = header_->GetCollections().StringIds().size();
+  std::vector<bool> is_shorty(num_strings, false);
+  std::vector<bool> from_hot_method(num_strings, false);
+  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
+    // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
+    // as hot.
+    const bool is_profile_class =
+        info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
+    if (is_profile_class) {
+      from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
+    }
+    dex_ir::ClassData* data = class_def->GetClassData();
+    if (data == nullptr) {
+      continue;
+    }
+    for (size_t i = 0; i < 2; ++i) {
+      for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
+        const dex_ir::MethodId* method_id = method->GetMethodId();
+        dex_ir::CodeItem* code_item = method->GetCodeItem();
+        if (code_item == nullptr) {
+          continue;
+        }
+        const bool is_clinit = is_profile_class &&
+            (method->GetAccessFlags() & kAccConstructor) != 0 &&
+            (method->GetAccessFlags() & kAccStatic) != 0;
+        const bool method_executed = is_clinit ||
+            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+        if (!method_executed) {
+          continue;
+        }
+        is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
+        dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
+        if (fixups == nullptr) {
+          continue;
+        }
+        if (fixups->StringIds() != nullptr) {
+          // Add const-strings.
+          for (dex_ir::StringId* id : *fixups->StringIds()) {
+            from_hot_method[id->GetIndex()] = true;
+          }
+        }
+        // TODO: Only visit field ids from static getters and setters.
+        for (dex_ir::FieldId* id : *fixups->FieldIds()) {
+          // Add the field names and types from getters and setters.
+          from_hot_method[id->Name()->GetIndex()] = true;
+          from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
+        }
+      }
+    }
+  }
+  // Sort string data by specified order.
+  std::vector<dex_ir::StringId*> string_ids;
+  size_t min_offset = std::numeric_limits<size_t>::max();
+  size_t max_offset = 0;
+  size_t hot_bytes = 0;
+  for (auto& string_id : header_->GetCollections().StringIds()) {
+    string_ids.push_back(string_id.get());
+    const size_t cur_offset = string_id->DataItem()->GetOffset();
+    CHECK_NE(cur_offset, 0u);
+    min_offset = std::min(min_offset, cur_offset);
+    dex_ir::StringData* data = string_id->DataItem();
+    const size_t element_size = data->GetSize() + 1;  // Add one extra for null.
+    size_t end_offset = cur_offset + element_size;
+    if (is_shorty[string_id->GetIndex()] || from_hot_method[string_id->GetIndex()]) {
+      hot_bytes += element_size;
+    }
+    max_offset = std::max(max_offset, end_offset);
+  }
+  VLOG(compiler) << "Hot string data bytes " << hot_bytes << "/" << max_offset - min_offset;
+  std::sort(string_ids.begin(),
+            string_ids.end(),
+            [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
+                                           const dex_ir::StringId* b) {
+    const bool a_is_hot = from_hot_method[a->GetIndex()];
+    const bool b_is_hot = from_hot_method[b->GetIndex()];
+    if (a_is_hot != b_is_hot) {
+      return a_is_hot < b_is_hot;
+    }
+    // After hot methods are partitioned, subpartition shorties.
+    const bool a_is_shorty = is_shorty[a->GetIndex()];
+    const bool b_is_shorty = is_shorty[b->GetIndex()];
+    if (a_is_shorty != b_is_shorty) {
+      return a_is_shorty < b_is_shorty;
+    }
+    // Preserve order.
+    return a->DataItem()->GetOffset() < b->DataItem()->GetOffset();
+  });
+  // Now we know what order we want the string data, reorder the offsets.
+  size_t offset = min_offset;
+  for (dex_ir::StringId* string_id : string_ids) {
+    dex_ir::StringData* data = string_id->DataItem();
+    data->SetOffset(offset);
+    offset += data->GetSize() + 1;  // Add one extra for null.
+  }
+  if (offset > max_offset) {
+    const uint32_t diff = offset - max_offset;
+    // If we expanded the string data section, we need to update the offsets or else we will
+    // corrupt the next section when writing out.
+    FixupSections(header_->GetCollections().StringDatasOffset(), diff);
+    // Update file size.
+    header_->SetFileSize(header_->FileSize() + diff);
+  }
+}
+
 // Orders code items according to specified class data ordering.
 // NOTE: If the section following the code items is byte aligned, the last code item is left in
 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
@@ -1686,6 +1791,7 @@
 }
 
 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
+  LayoutStringData(dex_file);
   std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
   int32_t diff = LayoutCodeItems(new_class_data_order);
   // Move sections after ClassData by diff bytes.
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index f26b423..69117ad 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -109,6 +109,7 @@
 
   std::vector<dex_ir::ClassData*> LayoutClassDefsAndClassData(const DexFile* dex_file);
   int32_t LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order);
+  void LayoutStringData(const DexFile* dex_file);
   bool IsNextSectionCodeItemAligned(uint32_t offset);
   template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
   void FixupSections(uint32_t offset, uint32_t diff);
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index bd6548e..4ef48ff 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -43,18 +43,6 @@
 static const char kDexFileLayoutInputProfile[] =
     "cHJvADAwNAABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
 
-static const char kDexFileLayoutExpectedOutputDex[] =
-    "ZGV4CjAzNQD1KW3+B8NAB0f2A/ZVIBJ0aHrGIqcpVTAUAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAH"
-    "AAAAcAAAAAQAAACMAAAAAQAAAJwAAAAAAAAAAAAAAAMAAACoAAAAAgAAAMAAAAAUAQAAAAEAADAB"
-    "AAA4AQAAQAEAAEgBAABNAQAAUgEAAGYBAAADAAAABAAAAAUAAAAGAAAABgAAAAMAAAAAAAAAAAAA"
-    "AAAAAAABAAAAAAAAAAIAAAAAAAAAAQAAAAAAAAACAAAAAAAAAAIAAAAAAAAAdQEAAAAAAAAAAAAA"
-    "AAAAAAIAAAAAAAAAAQAAAAAAAAB/AQAAAAAAAAEAAQABAAAAbwEAAAQAAABwEAIAAAAOAAEAAQAB"
-    "AAAAaQEAAAQAAABwEAIAAAAOAAY8aW5pdD4ABkEuamF2YQAGQi5qYXZhAANMQTsAA0xCOwASTGph"
-    "dmEvbGFuZy9PYmplY3Q7AAFWAAQABw48AAQABw48AAAAAQABgIAEgAIAAAEAAICABJgCAAAACwAA"
-    "AAAAAAABAAAAAAAAAAEAAAAHAAAAcAAAAAIAAAAEAAAAjAAAAAMAAAABAAAAnAAAAAUAAAADAAAA"
-    "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
-    "AAAAdQEAAAAQAAABAAAAjAEAAA==";
-
 // Dex file with catch handler unreferenced by try blocks.
 // Constructed by building a dex file with try/catch blocks and hex editing.
 static const char kUnreferencedCatchHandlerInputDex[] =
@@ -314,26 +302,21 @@
     WriteFileBase64(kDexFileLayoutInputDex, dex_file.c_str());
     std::string profile_file = tmp_dir + "primary.prof";
     WriteFileBase64(kDexFileLayoutInputProfile, profile_file.c_str());
-    std::string expected_output = tmp_dir + "expected.dex";
-    WriteFileBase64(kDexFileLayoutExpectedOutputDex, expected_output.c_str());
     std::string output_dex = tmp_dir + "classes.dex.new";
 
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
     std::vector<std::string> dexlayout_exec_argv =
-        { dexlayout, "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
+        { dexlayout, "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
     if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
       return false;
     }
-    std::vector<std::string> diff_exec_argv =
-        { "/usr/bin/diff", expected_output, output_dex };
-    if (!::art::Exec(diff_exec_argv, error_msg)) {
-      return false;
-    }
+
+    // -v makes sure that the layout did not corrupt the dex file.
 
     std::vector<std::string> rm_exec_argv =
-        { "/bin/rm", dex_file, profile_file, expected_output, output_dex };
+        { "/bin/rm", dex_file, profile_file, output_dex };
     if (!::art::Exec(rm_exec_argv, error_msg)) {
       return false;
     }
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index a25460e..b9a85bc 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -44,10 +44,15 @@
 
   // Merge all current profiles.
   for (size_t i = 0; i < profile_files.size(); i++) {
-    if (!info.Load(profile_files[i].GetFile()->Fd())) {
+    ProfileCompilationInfo cur_info;
+    if (!cur_info.Load(profile_files[i].GetFile()->Fd())) {
       LOG(WARNING) << "Could not load profile file at index " << i;
       return kErrorBadProfiles;
     }
+    if (!info.MergeWith(cur_info)) {
+      LOG(WARNING) << "Could not merge profile file at index " << i;
+      return kErrorBadProfiles;
+    }
   }
 
   // Check if there is enough new information added by the current profiles.
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 1a8a614..94f6e70 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -37,14 +37,25 @@
                     uint16_t number_of_classes,
                     const ScratchFile& profile,
                     ProfileCompilationInfo* info,
-                    uint16_t start_method_index = 0) {
+                    uint16_t start_method_index = 0,
+                    bool reverse_dex_write_order = false) {
     std::string dex_location1 = "location1" + id;
     uint32_t dex_location_checksum1 = checksum;
     std::string dex_location2 = "location2" + id;
     uint32_t dex_location_checksum2 = 10 * checksum;
     for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
-      ASSERT_TRUE(info->AddMethodIndex(dex_location1, dex_location_checksum1, i));
-      ASSERT_TRUE(info->AddMethodIndex(dex_location2, dex_location_checksum2, i));
+      // reverse_dex_write_order controls the order in which the dex files will be added to
+      // the profile and thus written to disk.
+      ProfileCompilationInfo::OfflineProfileMethodInfo pmi =
+          GetOfflineProfileMethodInfo(dex_location1, dex_location_checksum1,
+                                      dex_location2, dex_location_checksum2);
+      if (reverse_dex_write_order) {
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
+      } else {
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
+      }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
       ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, dex::TypeIndex(i)));
@@ -55,6 +66,43 @@
     ASSERT_TRUE(profile.GetFile()->ResetOffset());
   }
 
+  ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo(
+        const std::string& dex_location1, uint32_t dex_checksum1,
+        const std::string& dex_location2, uint32_t dex_checksum2) {
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+    pmi.dex_references.emplace_back(dex_location1, dex_checksum1);
+    pmi.dex_references.emplace_back(dex_location2, dex_checksum2);
+
+    // Monomorphic
+    for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.AddClass(0, dex::TypeIndex(0));
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Polymorphic
+    for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.AddClass(0, dex::TypeIndex(0));
+      dex_pc_data.AddClass(1, dex::TypeIndex(1));
+
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Megamorphic
+    for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.SetIsMegamorphic();
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Missing types
+    for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.SetIsMissingTypes();
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+
+    return pmi;
+  }
+
   int GetFd(const ScratchFile& file) const {
     return static_cast<int>(file.GetFd());
   }
@@ -99,6 +147,18 @@
     return ExecAndReturnCode(argv_str, &error);
   }
 
+  bool GenerateTestProfileWithInputDex(const std::string& filename) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    argv_str.push_back("--generate-test-profile=" + filename);
+    argv_str.push_back("--generate-test-profile-seed=0");
+    argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
+    argv_str.push_back("--dex-location=" + GetLibCoreDexFileNames()[0]);
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
+
   bool CreateProfile(std::string profile_file_contents,
                      const std::string& filename,
                      const std::string& dex_location) {
@@ -425,6 +485,17 @@
   ASSERT_TRUE(info.Load(GetFd(profile)));
 }
 
+TEST_F(ProfileAssistantTest, TestProfileGenerationWithIndexDex) {
+  ScratchFile profile;
+  // Generate a test profile passing in a dex file as reference.
+  GenerateTestProfileWithInputDex(profile.GetFilename());
+
+  // Verify that the generated profile is valid and can be loaded.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(info.Load(GetFd(profile)));
+}
+
 TEST_F(ProfileAssistantTest, TestProfileCreationAllMatch) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
@@ -629,4 +700,44 @@
   }
 }
 
+TEST_F(ProfileAssistantTest, MergeProfilesWithDifferentDexOrder) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1,
+      /*start_method_index*/0, /*reverse_dex_write_order*/false);
+
+  // The reference profile info will contain the methods with indices 50-150.
+  // When setting up the profile reverse the order in which the dex files
+  // are added to the profile. This will verify that profman merges profiles
+  // with a different dex order correctly.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, 0, reference_profile,
+      &reference_info, kNumberOfMethodsToEnableCompilation / 2, /*reverse_dex_write_order*/true);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(reference_info));
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profile must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index dac95b8..5504695 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -117,6 +117,8 @@
   UsageError("      number of methods that should be generated. Defaults to 5.");
   UsageError("  --generate-test-profile-class-ratio=<number>: the percentage from the maximum");
   UsageError("      number of classes that should be generated. Defaults to 5.");
+  UsageError("  --generate-test-profile-seed=<number>: seed for random number generator used when");
+  UsageError("      generating random test profiles. Defaults to using NanoTime.");
   UsageError("");
   UsageError("  --create-profile-from=<filename>: creates a profile from a list of classes.");
   UsageError("");
@@ -156,6 +158,7 @@
       test_profile_num_dex_(kDefaultTestProfileNumDex),
       test_profile_method_ratio_(kDefaultTestProfileMethodRatio),
       test_profile_class_ratio_(kDefaultTestProfileClassRatio),
+      test_profile_seed_(NanoTime()),
       start_ns_(NanoTime()) {}
 
   ~ProfMan() {
@@ -221,6 +224,8 @@
                         "--generate-test-profile-class-ratio",
                         &test_profile_class_ratio_,
                         Usage);
+      } else if (option.starts_with("--generate-test-profile-seed=")) {
+        ParseUintOption(option, "--generate-test-profile-seed", &test_profile_seed_, Usage);
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -798,17 +803,39 @@
     if (test_profile_class_ratio_ > 100) {
       Usage("Invalid ratio for --generate-test-profile-class-ratio");
     }
+    // If given APK files or DEX locations, check that they're ok.
+    if (!apk_files_.empty() || !apks_fd_.empty() || !dex_locations_.empty()) {
+      if (apk_files_.empty() && apks_fd_.empty()) {
+        Usage("APK files must be specified when passing DEX locations to --generate-test-profile");
+      }
+      if (dex_locations_.empty()) {
+        Usage("DEX locations must be specified when passing APK files to --generate-test-profile");
+      }
+    }
     // ShouldGenerateTestProfile confirms !test_profile_.empty().
     int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
     if (profile_test_fd < 0) {
       LOG(ERROR) << "Cannot open " << test_profile_ << strerror(errno);
       return -1;
     }
-
-    bool result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
-                                                              test_profile_num_dex_,
-                                                              test_profile_method_ratio_,
-                                                              test_profile_class_ratio_);
+    bool result;
+    if (apk_files_.empty() && apks_fd_.empty() && dex_locations_.empty()) {
+      result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                           test_profile_num_dex_,
+                                                           test_profile_method_ratio_,
+                                                           test_profile_class_ratio_,
+                                                           test_profile_seed_);
+    } else {
+      // Initialize MemMap for ZipArchive::OpenFromFd.
+      MemMap::Init();
+      // Open the dex files to look up classes and methods.
+      std::vector<std::unique_ptr<const DexFile>> dex_files;
+      OpenApkFilesFromLocations(&dex_files);
+      // Create a random profile file based on the set of dex files.
+      result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                           dex_files,
+                                                           test_profile_seed_);
+    }
     close(profile_test_fd);  // ignore close result.
     return result ? 0 : -1;
   }
@@ -857,6 +884,7 @@
   uint16_t test_profile_num_dex_;
   uint16_t test_profile_method_ratio_;
   uint16_t test_profile_class_ratio_;
+  uint32_t test_profile_seed_;
   uint64_t start_ns_;
 };
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 36f9ea7..2349620 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -32,6 +32,33 @@
 // Cast entrypoints.
 extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regXX uses a non-standard calling
+// convention: it expects its input in register XX+1 and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -59,9 +86,71 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-// No read barrier entrypoints for marking registers.
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
-                                  bool is_marking ATTRIBUTE_UNUSED) {}
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
+                "Non-direct C stub marked direct.");
+}
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // Note: MIPS has asserts checking for the type of entrypoint. Don't move it
@@ -287,77 +376,19 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct.");
-  // Read barriers (and these entry points in particular) are not
-  // supported in the compiler on MIPS32.
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  // Cannot use the following registers to pass arguments:
+  // 0(ZERO), 1(AT), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA).
+  // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8).
   qpoints->pReadBarrierMarkReg00 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg01 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg02 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg03 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg04 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg05 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg06 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg07 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg08 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg09 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg10 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg11 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg12 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg13 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg14 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierMarkReg15 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg15),
                 "Non-direct C stub marked direct.");
   qpoints->pReadBarrierMarkReg16 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg16),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg17 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg18 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg19 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg20 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg21 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg22 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierMarkReg23 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg23),
                 "Non-direct C stub marked direct.");
@@ -376,9 +407,6 @@
   qpoints->pReadBarrierMarkReg28 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg28),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg29 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 5d61539..808536b 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -2198,6 +2198,151 @@
     subu   $v0, $t0, $t1  # return (this.charAt(i) - anotherString.charAt(i))
 END art_quick_string_compareto
 
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    /* TODO: optimizations: mark bit, forwarding. */
+    addiu   $sp, $sp, -160      # includes 16 bytes of space for argument registers a0-a3
+    .cfi_adjust_cfa_offset 160
+
+    sw      $ra, 156($sp)
+    .cfi_rel_offset 31, 156
+    sw      $t8, 152($sp)
+    .cfi_rel_offset 24, 152
+    sw      $t7, 148($sp)
+    .cfi_rel_offset 15, 148
+    sw      $t6, 144($sp)
+    .cfi_rel_offset 14, 144
+    sw      $t5, 140($sp)
+    .cfi_rel_offset 13, 140
+    sw      $t4, 136($sp)
+    .cfi_rel_offset 12, 136
+    sw      $t3, 132($sp)
+    .cfi_rel_offset 11, 132
+    sw      $t2, 128($sp)
+    .cfi_rel_offset 10, 128
+    sw      $t1, 124($sp)
+    .cfi_rel_offset 9, 124
+    sw      $t0, 120($sp)
+    .cfi_rel_offset 8, 120
+    sw      $a3, 116($sp)
+    .cfi_rel_offset 7, 116
+    sw      $a2, 112($sp)
+    .cfi_rel_offset 6, 112
+    sw      $a1, 108($sp)
+    .cfi_rel_offset 5, 108
+    sw      $a0, 104($sp)
+    .cfi_rel_offset 4, 104
+    sw      $v1, 100($sp)
+    .cfi_rel_offset 3, 100
+    sw      $v0, 96($sp)
+    .cfi_rel_offset 2, 96
+
+    la      $t9, artReadBarrierMark
+
+    sdc1    $f18, 88($sp)
+    sdc1    $f16, 80($sp)
+    sdc1    $f14, 72($sp)
+    sdc1    $f12, 64($sp)
+    sdc1    $f10, 56($sp)
+    sdc1    $f8,  48($sp)
+    sdc1    $f6,  40($sp)
+    sdc1    $f4,  32($sp)
+    sdc1    $f2,  24($sp)
+
+    .ifnc \reg, $a0
+      move  $a0, \reg           # pass obj from `reg` in a0
+    .endif
+    jalr    $t9                 # v0 <- artReadBarrierMark(obj)
+    sdc1    $f0,  16($sp)       # in delay slot
+
+    lw      $ra, 156($sp)
+    .cfi_restore 31
+    lw      $t8, 152($sp)
+    .cfi_restore 24
+    lw      $t7, 148($sp)
+    .cfi_restore 15
+    lw      $t6, 144($sp)
+    .cfi_restore 14
+    lw      $t5, 140($sp)
+    .cfi_restore 13
+    lw      $t4, 136($sp)
+    .cfi_restore 12
+    lw      $t3, 132($sp)
+    .cfi_restore 11
+    lw      $t2, 128($sp)
+    .cfi_restore 10
+    lw      $t1, 124($sp)
+    .cfi_restore 9
+    lw      $t0, 120($sp)
+    .cfi_restore 8
+    lw      $a3, 116($sp)
+    .cfi_restore 7
+    lw      $a2, 112($sp)
+    .cfi_restore 6
+    lw      $a1, 108($sp)
+    .cfi_restore 5
+    lw      $a0, 104($sp)
+    .cfi_restore 4
+    lw      $v1, 100($sp)
+    .cfi_restore 3
+
+    .ifnc \reg, $v0
+      move  \reg, $v0           # `reg` <- v0
+      lw    $v0, 96($sp)
+      .cfi_restore 2
+    .endif
+
+    ldc1    $f18, 88($sp)
+    ldc1    $f16, 80($sp)
+    ldc1    $f14, 72($sp)
+    ldc1    $f12, 64($sp)
+    ldc1    $f10, 56($sp)
+    ldc1    $f8,  48($sp)
+    ldc1    $f6,  40($sp)
+    ldc1    $f4,  32($sp)
+    ldc1    $f2,  24($sp)
+    ldc1    $f0,  16($sp)
+
+    jalr    $zero, $ra
+    addiu   $sp, $sp, 160
+    .cfi_adjust_cfa_offset -160
+END \name
+.endm
+
+// Note that art_quick_read_barrier_mark_regXX corresponds to register XX+1.
+// ZERO (register 0) is reserved.
+// AT (register 1) is reserved as a temporary/scratch register.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, $v0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, $v1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, $a0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, $a1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, $a2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, $a3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, $t0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, $t1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, $t2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, $t3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, $t4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, $t5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, $t6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, $t7
+// S0 and S1 (registers 16 and 17) are reserved as suspended and thread registers.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, $s2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, $s3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, $s4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, $s5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, $s6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7
+// T8 and T9 (registers 24 and 25) are reserved as temporary/scratch registers.
+// K0, K1, GP, SP (registers 26 - 29) are reserved.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8
+// RA (register 31) is reserved.
+
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index bc17d47..66405cb 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -32,6 +32,32 @@
 // Cast entrypoints.
 extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regXX uses a non-standard calling
+// convention: it expects its input in register XX+1 and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -60,8 +86,28 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 // No read barrier entrypoints for marking registers.
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
-                                  bool is_marking ATTRIBUTE_UNUSED) {}
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+}
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
@@ -103,38 +149,20 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  // Read barriers (and these entry points in particular) are not
-  // supported in the compiler on MIPS64.
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  // Cannot use the following registers to pass arguments:
+  // 0(ZERO), 1(AT), 15(T3), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA).
+  // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8).
   qpoints->pReadBarrierMarkReg00 = nullptr;
-  qpoints->pReadBarrierMarkReg01 = nullptr;
-  qpoints->pReadBarrierMarkReg02 = nullptr;
-  qpoints->pReadBarrierMarkReg03 = nullptr;
-  qpoints->pReadBarrierMarkReg04 = nullptr;
-  qpoints->pReadBarrierMarkReg05 = nullptr;
-  qpoints->pReadBarrierMarkReg06 = nullptr;
-  qpoints->pReadBarrierMarkReg07 = nullptr;
-  qpoints->pReadBarrierMarkReg08 = nullptr;
-  qpoints->pReadBarrierMarkReg09 = nullptr;
-  qpoints->pReadBarrierMarkReg10 = nullptr;
-  qpoints->pReadBarrierMarkReg11 = nullptr;
-  qpoints->pReadBarrierMarkReg12 = nullptr;
-  qpoints->pReadBarrierMarkReg13 = nullptr;
   qpoints->pReadBarrierMarkReg14 = nullptr;
   qpoints->pReadBarrierMarkReg15 = nullptr;
   qpoints->pReadBarrierMarkReg16 = nullptr;
-  qpoints->pReadBarrierMarkReg17 = nullptr;
-  qpoints->pReadBarrierMarkReg18 = nullptr;
-  qpoints->pReadBarrierMarkReg19 = nullptr;
-  qpoints->pReadBarrierMarkReg20 = nullptr;
-  qpoints->pReadBarrierMarkReg21 = nullptr;
-  qpoints->pReadBarrierMarkReg22 = nullptr;
   qpoints->pReadBarrierMarkReg23 = nullptr;
   qpoints->pReadBarrierMarkReg24 = nullptr;
   qpoints->pReadBarrierMarkReg25 = nullptr;
   qpoints->pReadBarrierMarkReg26 = nullptr;
   qpoints->pReadBarrierMarkReg27 = nullptr;
   qpoints->pReadBarrierMarkReg28 = nullptr;
-  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 3ee9c4a..9c92805 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -2053,6 +2053,180 @@
 #endif
 END art_quick_indexof
 
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    /* TODO: optimizations: mark bit, forwarding. */
+    daddiu  $sp, $sp, -320
+    .cfi_adjust_cfa_offset 320
+
+    sd      $ra, 312($sp)
+    .cfi_rel_offset 31, 312
+    sd      $t8, 304($sp)       # save t8 holding caller's gp
+    .cfi_rel_offset 24, 304
+    sd      $t3, 296($sp)
+    .cfi_rel_offset 15, 296
+    sd      $t2, 288($sp)
+    .cfi_rel_offset 14, 288
+    sd      $t1, 280($sp)
+    .cfi_rel_offset 13, 280
+    sd      $t0, 272($sp)
+    .cfi_rel_offset 12, 272
+    sd      $a7, 264($sp)
+    .cfi_rel_offset 11, 264
+    sd      $a6, 256($sp)
+    .cfi_rel_offset 10, 256
+    sd      $a5, 248($sp)
+    .cfi_rel_offset 9, 248
+    sd      $a4, 240($sp)
+    .cfi_rel_offset 8, 240
+    sd      $a3, 232($sp)
+    .cfi_rel_offset 7, 232
+    sd      $a2, 224($sp)
+    .cfi_rel_offset 6, 224
+    sd      $a1, 216($sp)
+    .cfi_rel_offset 5, 216
+    sd      $a0, 208($sp)
+    .cfi_rel_offset 4, 208
+    sd      $v1, 200($sp)
+    .cfi_rel_offset 3, 200
+    sd      $v0, 192($sp)
+    .cfi_rel_offset 2, 192
+
+    dla     $t9, artReadBarrierMark
+
+    sdc1    $f23, 184($sp)
+    sdc1    $f22, 176($sp)
+    sdc1    $f21, 168($sp)
+    sdc1    $f20, 160($sp)
+    sdc1    $f19, 152($sp)
+    sdc1    $f18, 144($sp)
+    sdc1    $f17, 136($sp)
+    sdc1    $f16, 128($sp)
+    sdc1    $f15, 120($sp)
+    sdc1    $f14, 112($sp)
+    sdc1    $f13, 104($sp)
+    sdc1    $f12,  96($sp)
+    sdc1    $f11,  88($sp)
+    sdc1    $f10,  80($sp)
+    sdc1    $f9,   72($sp)
+    sdc1    $f8,   64($sp)
+    sdc1    $f7,   56($sp)
+    sdc1    $f6,   48($sp)
+    sdc1    $f5,   40($sp)
+    sdc1    $f4,   32($sp)
+    sdc1    $f3,   24($sp)
+    sdc1    $f2,   16($sp)
+    sdc1    $f1,    8($sp)
+
+    .ifnc \reg, $a0
+      move  $a0, \reg           # pass obj from `reg` in a0
+    .endif
+    jalr    $t9                 # v0 <- artReadBarrierMark(obj)
+    sdc1    $f0,    0($sp)      # in delay slot
+
+    ld      $ra, 312($sp)
+    .cfi_restore 31
+    ld      $t8, 304($sp)       # restore t8 holding caller's gp
+    .cfi_restore 24
+    ld      $t3, 296($sp)
+    .cfi_restore 15
+    ld      $t2, 288($sp)
+    .cfi_restore 14
+    ld      $t1, 280($sp)
+    .cfi_restore 13
+    ld      $t0, 272($sp)
+    .cfi_restore 12
+    ld      $a7, 264($sp)
+    .cfi_restore 11
+    ld      $a6, 256($sp)
+    .cfi_restore 10
+    ld      $a5, 248($sp)
+    .cfi_restore 9
+    ld      $a4, 240($sp)
+    .cfi_restore 8
+    ld      $a3, 232($sp)
+    .cfi_restore 7
+    ld      $a2, 224($sp)
+    .cfi_restore 6
+    ld      $a1, 216($sp)
+    .cfi_restore 5
+    ld      $a0, 208($sp)
+    .cfi_restore 4
+    ld      $v1, 200($sp)
+    .cfi_restore 3
+
+    .ifnc \reg, $v0
+      move  \reg, $v0           # `reg` <- v0
+      ld    $v0, 192($sp)
+      .cfi_restore 2
+    .endif
+
+    ldc1    $f23, 184($sp)
+    ldc1    $f22, 176($sp)
+    ldc1    $f21, 168($sp)
+    ldc1    $f20, 160($sp)
+    ldc1    $f19, 152($sp)
+    ldc1    $f18, 144($sp)
+    ldc1    $f17, 136($sp)
+    ldc1    $f16, 128($sp)
+    ldc1    $f15, 120($sp)
+    ldc1    $f14, 112($sp)
+    ldc1    $f13, 104($sp)
+    ldc1    $f12,  96($sp)
+    ldc1    $f11,  88($sp)
+    ldc1    $f10,  80($sp)
+    ldc1    $f9,   72($sp)
+    ldc1    $f8,   64($sp)
+    ldc1    $f7,   56($sp)
+    ldc1    $f6,   48($sp)
+    ldc1    $f5,   40($sp)
+    ldc1    $f4,   32($sp)
+    ldc1    $f3,   24($sp)
+    ldc1    $f2,   16($sp)
+    ldc1    $f1,    8($sp)
+    ldc1    $f0,    0($sp)
+
+    .cpreturn                   # restore caller's gp from t8
+    jalr    $zero, $ra
+    daddiu  $sp, $sp, 320
+    .cfi_adjust_cfa_offset -320
+END \name
+.endm
+
+// Note that art_quick_read_barrier_mark_regXX corresponds to register XX+1.
+// ZERO (register 0) is reserved.
+// AT (register 1) is reserved as a temporary/scratch register.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, $v0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, $v1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, $a0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, $a1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, $a2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, $a3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, $a4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, $a5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, $a6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, $a7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, $t0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, $t1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, $t2
+// T3 (register 15) is reserved as a temporary/scratch register.
+// S0 and S1 (registers 16 and 17) are reserved as suspended and thread registers.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, $s2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, $s3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, $s4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, $s5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, $s6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7
+// T8 and T9 (registers 24 and 25) are reserved as temporary/scratch registers.
+// K0, K1, GP, SP (registers 26 - 29) are reserved.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8
+// RA (register 31) is reserved.
+
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index b47f8f0..5cf0e0f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -32,6 +32,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array.h"
+#include "mirror/string.h"
 #include "oat.h"
 #include "obj_ptr-inl.h"
 #include "quick/quick_method_frame_info.h"
@@ -56,8 +57,10 @@
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
       if (kCheckDeclaringClassState) {
-        CHECK(result->IsIdxLoaded() || result->IsErroneous())
-            << result->GetStatus() << " " << result->PrettyClass();
+        if (!(result->IsIdxLoaded() || result->IsErroneous())) {
+          LOG(FATAL_WITHOUT_ABORT) << "Class status: " << result->GetStatus();
+          LOG(FATAL) << result->PrettyClass();
+        }
       }
     } else {
       CHECK(result == nullptr) << this;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 038aeb3..2414b5f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -62,10 +62,11 @@
   kJdwpAdbStateLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
+  kMarkSweepMarkStackLock,
   kRosAllocGlobalLock,
   kRosAllocBracketLock,
   kRosAllocBulkFreeLock,
-  kMarkSweepMarkStackLock,
+  kTaggingLockLevel,
   kTransactionLogLock,
   kJniFunctionTableLock,
   kJniWeakGlobalsLock,
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index a2c459c..13dbc3f 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -57,6 +57,14 @@
 static_assert(InlineCache::kIndividualCacheSize < kIsMissingTypesEncoding,
               "InlineCache::kIndividualCacheSize is larger than expected");
 
+ProfileCompilationInfo::ProfileCompilationInfo(const ProfileCompilationInfo& pci) {
+  MergeWith(pci);
+}
+
+ProfileCompilationInfo::~ProfileCompilationInfo() {
+  ClearProfile();
+}
+
 void ProfileCompilationInfo::DexPcData::AddClass(uint16_t dex_profile_idx,
                                                  const dex::TypeIndex& type_idx) {
   if (is_megamorphic || is_missing_types) {
@@ -227,28 +235,21 @@
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
   AddUintToBuffer(&buffer, static_cast<uint8_t>(info_.size()));
 
-  // Make sure we write the dex files in order of their profile index. This
+  // Dex files must be written in the order of their profile index. This
   // avoids writing the index in the output file and simplifies the parsing logic.
-  std::vector<const std::string*> ordered_info_location(info_.size());
-  std::vector<const DexFileData*> ordered_info_data(info_.size());
-  for (const auto& it : info_) {
-    ordered_info_location[it.second.profile_index] = &(it.first);
-    ordered_info_data[it.second.profile_index] = &(it.second);
-  }
-  for (size_t i = 0; i < info_.size(); i++) {
+  for (const DexFileData* dex_data_ptr : info_) {
+    const DexFileData& dex_data = *dex_data_ptr;
     if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
       if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
         return false;
       }
       buffer.clear();
     }
-    const std::string& dex_location = *ordered_info_location[i];
-    const DexFileData& dex_data = *ordered_info_data[i];
 
     // Note that we allow dex files without any methods or classes, so that
     // inline caches can refer valid dex files.
 
-    if (dex_location.size() >= kMaxDexFileKeyLength) {
+    if (dex_data.profile_key.size() >= kMaxDexFileKeyLength) {
       LOG(WARNING) << "DexFileKey exceeds allocated limit";
       return false;
     }
@@ -258,19 +259,19 @@
     uint32_t methods_region_size = GetMethodsRegionSize(dex_data);
     size_t required_capacity = buffer.size() +
         kLineHeaderSize +
-        dex_location.size() +
+        dex_data.profile_key.size() +
         sizeof(uint16_t) * dex_data.class_set.size() +
         methods_region_size;
 
     buffer.reserve(required_capacity);
-    DCHECK_LE(dex_location.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.profile_key.size(), std::numeric_limits<uint16_t>::max());
     DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
-    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_location.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.profile_key.size()));
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
     AddUintToBuffer(&buffer, methods_region_size);  // uint32_t
     AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
 
-    AddStringToBuffer(&buffer, dex_location);
+    AddStringToBuffer(&buffer, dex_data.profile_key);
 
     for (const auto& method_it : dex_data.method_map) {
       AddUintToBuffer(&buffer, method_it.first);
@@ -375,23 +376,52 @@
 }
 
 ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
-    const std::string& dex_location,
+    const std::string& profile_key,
     uint32_t checksum) {
-  auto info_it = info_.FindOrAdd(dex_location, DexFileData(checksum, info_.size()));
-  if (info_.size() > std::numeric_limits<uint8_t>::max()) {
+  const auto& profile_index_it = profile_key_map_.FindOrAdd(profile_key, profile_key_map_.size());
+  if (profile_key_map_.size() > std::numeric_limits<uint8_t>::max()) {
     // Allow only 255 dex files to be profiled. This allows us to save bytes
     // when encoding. The number is well above what we expect for normal applications.
     if (kIsDebugBuild) {
-      LOG(WARNING) << "Exceeded the maximum number of dex files (255). Something went wrong";
+      LOG(ERROR) << "Exceeded the maximum number of dex files (255). Something went wrong";
     }
-    info_.erase(dex_location);
+    profile_key_map_.erase(profile_key);
     return nullptr;
   }
-  if (info_it->second.checksum != checksum) {
-    LOG(WARNING) << "Checksum mismatch for dex " << dex_location;
+
+  uint8_t profile_index = profile_index_it->second;
+  if (info_.size() <= profile_index) {
+    // This is a new addition. Add it to the info_ array.
+    info_.emplace_back(new DexFileData(profile_key, checksum, profile_index));
+  }
+  DexFileData* result = info_[profile_index];
+  // DCHECK that profile info map key is consistent with the one stored in the dex file data.
+  // This should always be the case since since the cache map is managed by ProfileCompilationInfo.
+  DCHECK_EQ(profile_key, result->profile_key);
+  DCHECK_EQ(profile_index, result->profile_index);
+
+  // Check that the checksum matches.
+  // This may different if for example the dex file was updated and
+  // we had a record of the old one.
+  if (result->checksum != checksum) {
+    LOG(WARNING) << "Checksum mismatch for dex " << profile_key;
     return nullptr;
   }
-  return &info_it->second;
+  return result;
+}
+
+const ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::FindDexData(
+      const std::string& profile_key) const {
+  const auto& profile_index_it = profile_key_map_.find(profile_key);
+  if (profile_index_it == profile_key_map_.end()) {
+    return nullptr;
+  }
+
+  uint8_t profile_index = profile_index_it->second;
+  const DexFileData* result = info_[profile_index];
+  DCHECK_EQ(profile_key, result->profile_key);
+  DCHECK_EQ(profile_index, result->profile_index);
+  return result;
 }
 
 bool ProfileCompilationInfo::AddResolvedClasses(const DexCacheResolvedClasses& classes) {
@@ -415,9 +445,7 @@
                                        uint32_t dex_checksum,
                                        uint16_t method_index,
                                        const OfflineProfileMethodInfo& pmi) {
-  DexFileData* const data = GetOrAddDexFileData(
-      GetProfileDexFileKey(dex_location),
-      dex_checksum);
+  DexFileData* const data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location), dex_checksum);
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
@@ -753,6 +781,8 @@
   return kProfileLoadSuccess;
 }
 
+// TODO(calin): Fix this API. ProfileCompilationInfo::Load should be static and
+// return a unique pointer to a ProfileCompilationInfo upon success.
 bool ProfileCompilationInfo::Load(int fd) {
   std::string error;
   ProfileLoadSatus status = LoadInternal(fd, &error);
@@ -770,6 +800,10 @@
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
+  if (!IsEmpty()) {
+    return kProfileLoadWouldOverwiteData;
+  }
+
   struct stat stat_buffer;
   if (fstat(fd, &stat_buffer) != 0) {
     return kProfileLoadIOError;
@@ -820,10 +854,10 @@
   // the current profile info.
   // Note that the number of elements should be very small, so this should not
   // be a performance issue.
-  for (const auto& other_it : other.info_) {
-    auto info_it = info_.find(other_it.first);
-    if ((info_it != info_.end()) && (info_it->second.checksum != other_it.second.checksum)) {
-      LOG(WARNING) << "Checksum mismatch for dex " << other_it.first;
+  for (const DexFileData* other_dex_data : other.info_) {
+    const DexFileData* dex_data = FindDexData(other_dex_data->profile_key);
+    if ((dex_data != nullptr) && (dex_data->checksum != other_dex_data->checksum)) {
+      LOG(WARNING) << "Checksum mismatch for dex " << other_dex_data->profile_key;
       return false;
     }
   }
@@ -840,32 +874,28 @@
   // First, build a mapping from other_dex_profile_index to this_dex_profile_index.
   // This will make sure that the ClassReferences  will point to the correct dex file.
   SafeMap<uint8_t, uint8_t> dex_profile_index_remap;
-  for (const auto& other_it : other.info_) {
-    const std::string& other_dex_location = other_it.first;
-    uint32_t other_checksum = other_it.second.checksum;
-    const DexFileData& other_dex_data = other_it.second;
-    const DexFileData* dex_data = GetOrAddDexFileData(other_dex_location, other_checksum);
+  for (const DexFileData* other_dex_data : other.info_) {
+    const DexFileData* dex_data = GetOrAddDexFileData(other_dex_data->profile_key,
+                                                      other_dex_data->checksum);
     if (dex_data == nullptr) {
       return false;  // Could happen if we exceed the number of allowed dex files.
     }
-    dex_profile_index_remap.Put(other_dex_data.profile_index, dex_data->profile_index);
+    dex_profile_index_remap.Put(other_dex_data->profile_index, dex_data->profile_index);
   }
 
   // Merge the actual profile data.
-  for (const auto& other_it : other.info_) {
-    const std::string& other_dex_location = other_it.first;
-    const DexFileData& other_dex_data = other_it.second;
-    auto info_it = info_.find(other_dex_location);
-    DCHECK(info_it != info_.end());
+  for (const DexFileData* other_dex_data : other.info_) {
+    DexFileData* dex_data = const_cast<DexFileData*>(FindDexData(other_dex_data->profile_key));
+    DCHECK(dex_data != nullptr);
 
     // Merge the classes.
-    info_it->second.class_set.insert(other_dex_data.class_set.begin(),
-                                     other_dex_data.class_set.end());
+    dex_data->class_set.insert(other_dex_data->class_set.begin(),
+                               other_dex_data->class_set.end());
 
     // Merge the methods and the inline caches.
-    for (const auto& other_method_it : other_dex_data.method_map) {
+    for (const auto& other_method_it : other_dex_data->method_map) {
       uint16_t other_method_index = other_method_it.first;
-      auto method_it = info_it->second.method_map.FindOrAdd(other_method_index);
+      auto method_it = dex_data->method_map.FindOrAdd(other_method_index);
       const auto& other_inline_cache = other_method_it.second;
       for (const auto& other_ic_it : other_inline_cache) {
         uint16_t other_dex_pc = other_ic_it.first;
@@ -905,28 +935,18 @@
 ProfileCompilationInfo::FindMethod(const std::string& dex_location,
                                    uint32_t dex_checksum,
                                    uint16_t dex_method_index) const {
-  auto info_it = info_.find(GetProfileDexFileKey(dex_location));
-  if (info_it != info_.end()) {
-    if (!ChecksumMatch(dex_checksum, info_it->second.checksum)) {
+  const DexFileData* dex_data = FindDexData(GetProfileDexFileKey(dex_location));
+  if (dex_data != nullptr) {
+    if (!ChecksumMatch(dex_checksum, dex_data->checksum)) {
       return nullptr;
     }
-    const MethodMap& methods = info_it->second.method_map;
+    const MethodMap& methods = dex_data->method_map;
     const auto method_it = methods.find(dex_method_index);
     return method_it == methods.end() ? nullptr : &(method_it->second);
   }
   return nullptr;
 }
 
-void ProfileCompilationInfo::DexFileToProfileIndex(
-    /*out*/std::vector<DexReference>* dex_references) const {
-  dex_references->resize(info_.size());
-  for (const auto& info_it : info_) {
-    DexReference& dex_ref = (*dex_references)[info_it.second.profile_index];
-    dex_ref.dex_location = info_it.first;
-    dex_ref.dex_checksum = info_it.second.checksum;
-  }
-}
-
 bool ProfileCompilationInfo::GetMethod(const std::string& dex_location,
                                        uint32_t dex_checksum,
                                        uint16_t dex_method_index,
@@ -936,7 +956,12 @@
     return false;
   }
 
-  DexFileToProfileIndex(&pmi->dex_references);
+  pmi->dex_references.resize(info_.size());
+  for (const DexFileData* dex_data : info_) {
+    pmi->dex_references[dex_data->profile_index].dex_location = dex_data->profile_key;
+    pmi->dex_references[dex_data->profile_index].dex_checksum = dex_data->checksum;
+  }
+
   // TODO(calin): maybe expose a direct pointer to avoid copying
   pmi->inline_caches = *inline_caches;
   return true;
@@ -944,12 +969,12 @@
 
 
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const {
-  auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
-  if (info_it != info_.end()) {
-    if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
+  const DexFileData* dex_data = FindDexData(GetProfileDexFileKey(dex_file.GetLocation()));
+  if (dex_data != nullptr) {
+    if (!ChecksumMatch(dex_file, dex_data->checksum)) {
       return false;
     }
-    const std::set<dex::TypeIndex>& classes = info_it->second.class_set;
+    const std::set<dex::TypeIndex>& classes = dex_data->class_set;
     return classes.find(type_idx) != classes.end();
   }
   return false;
@@ -957,16 +982,16 @@
 
 uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
   uint32_t total = 0;
-  for (const auto& it : info_) {
-    total += it.second.method_map.size();
+  for (const DexFileData* dex_data : info_) {
+    total += dex_data->method_map.size();
   }
   return total;
 }
 
 uint32_t ProfileCompilationInfo::GetNumberOfResolvedClasses() const {
   uint32_t total = 0;
-  for (const auto& it : info_) {
-    total += it.second.class_set.size();
+  for (const DexFileData* dex_data : info_) {
+    total += dex_data->class_set.size();
   }
   return total;
 }
@@ -999,35 +1024,27 @@
   os << "ProfileInfo:";
 
   const std::string kFirstDexFileKeySubstitute = ":classes.dex";
-  // Write the entries in profile index order.
-  std::vector<const std::string*> ordered_info_location(info_.size());
-  std::vector<const DexFileData*> ordered_info_data(info_.size());
-  for (const auto& it : info_) {
-    ordered_info_location[it.second.profile_index] = &(it.first);
-    ordered_info_data[it.second.profile_index] = &(it.second);
-  }
-  for (size_t profile_index = 0; profile_index < info_.size(); profile_index++) {
+
+  for (const DexFileData* dex_data : info_) {
     os << "\n";
-    const std::string& location = *ordered_info_location[profile_index];
-    const DexFileData& dex_data = *ordered_info_data[profile_index];
     if (print_full_dex_location) {
-      os << location;
+      os << dex_data->profile_key;
     } else {
       // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
-      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(dex_data->profile_key);
       os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
     }
-    os << " [index=" << static_cast<uint32_t>(dex_data.profile_index) << "]";
+    os << " [index=" << static_cast<uint32_t>(dex_data->profile_index) << "]";
     const DexFile* dex_file = nullptr;
     if (dex_files != nullptr) {
       for (size_t i = 0; i < dex_files->size(); i++) {
-        if (location == (*dex_files)[i]->GetLocation()) {
+        if (dex_data->profile_key == (*dex_files)[i]->GetLocation()) {
           dex_file = (*dex_files)[i];
         }
       }
     }
     os << "\n\tmethods: ";
-    for (const auto& method_it : dex_data.method_map) {
+    for (const auto& method_it : dex_data->method_map) {
       if (dex_file != nullptr) {
         os << "\n\t\t" << dex_file->PrettyMethod(method_it.first, true);
       } else {
@@ -1052,7 +1069,7 @@
       os << "], ";
     }
     os << "\n\tclasses: ";
-    for (const auto class_it : dex_data.class_set) {
+    for (const auto class_it : dex_data->class_set) {
       if (dex_file != nullptr) {
         os << "\n\t\t" << dex_file->PrettyType(class_it);
       } else {
@@ -1076,19 +1093,17 @@
   if (info_.empty()) {
     return;
   }
-  for (const auto& it : info_) {
-    const std::string& location = it.first;
-    const DexFileData& dex_data = it.second;
+  for (const DexFileData* dex_data : info_) {
     const DexFile* dex_file = nullptr;
     if (dex_files != nullptr) {
       for (size_t i = 0; i < dex_files->size(); i++) {
-        if (location == GetProfileDexFileKey((*dex_files)[i]->GetLocation()) &&
-            dex_data.checksum == (*dex_files)[i]->GetLocationChecksum()) {
+        if (dex_data->profile_key == GetProfileDexFileKey((*dex_files)[i]->GetLocation()) &&
+            dex_data->checksum == (*dex_files)[i]->GetLocationChecksum()) {
           dex_file = (*dex_files)[i];
         }
       }
     }
-    for (const auto class_it : dex_data.class_set) {
+    for (const auto class_it : dex_data->class_set) {
       if (dex_file != nullptr) {
         class_names->insert(std::string(dex_file->PrettyType(class_it)));
       }
@@ -1097,7 +1112,19 @@
 }
 
 bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
-  return info_.Equals(other.info_);
+  // No need to compare profile_key_map_. That's only a cache for fast search.
+  // All the information is already in the info_ vector.
+  if (info_.size() != other.info_.size()) {
+    return false;
+  }
+  for (size_t i = 0; i < info_.size(); i++) {
+    const DexFileData& dex_data = *info_[i];
+    const DexFileData& other_dex_data = *other.info_[i];
+    if (!(dex_data == other_dex_data)) {
+      return false;
+    }
+  }
+  return true;
 }
 
 std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses(
@@ -1107,13 +1134,11 @@
     key_to_location_map.emplace(GetProfileDexFileKey(location), location);
   }
   std::set<DexCacheResolvedClasses> ret;
-  for (auto&& pair : info_) {
-    const std::string& profile_key = pair.first;
-    auto it = key_to_location_map.find(profile_key);
+  for (const DexFileData* dex_data : info_) {
+    const auto& it = key_to_location_map.find(dex_data->profile_key);
     if (it != key_to_location_map.end()) {
-      const DexFileData& data = pair.second;
-      DexCacheResolvedClasses classes(it->second, it->second, data.checksum);
-      classes.AddClasses(data.class_set.begin(), data.class_set.end());
+      DexCacheResolvedClasses classes(it->second, it->second, dex_data->checksum);
+      classes.AddClasses(dex_data->class_set.begin(), dex_data->class_set.end());
       ret.insert(classes);
     }
   }
@@ -1121,8 +1146,8 @@
 }
 
 void ProfileCompilationInfo::ClearResolvedClasses() {
-  for (auto& pair : info_) {
-    pair.second.class_set.clear();
+  for (DexFileData* dex_data : info_) {
+    dex_data->class_set.clear();
   }
 }
 
@@ -1130,7 +1155,8 @@
 bool ProfileCompilationInfo::GenerateTestProfile(int fd,
                                                  uint16_t number_of_dex_files,
                                                  uint16_t method_ratio,
-                                                 uint16_t class_ratio) {
+                                                 uint16_t class_ratio,
+                                                 uint32_t random_seed) {
   const std::string base_dex_location = "base.apk";
   ProfileCompilationInfo info;
   // The limits are defined by the dex specification.
@@ -1139,7 +1165,7 @@
   uint16_t number_of_methods = max_method * method_ratio / 100;
   uint16_t number_of_classes = max_classes * class_ratio / 100;
 
-  srand(MicroTime());
+  std::srand(random_seed);
 
   // Make sure we generate more samples with a low index value.
   // This makes it more likely to hit valid method/class indices in small apps.
@@ -1169,6 +1195,32 @@
   return info.Save(fd);
 }
 
+// Naive implementation to generate a random profile file suitable for testing.
+bool ProfileCompilationInfo::GenerateTestProfile(
+    int fd,
+    std::vector<std::unique_ptr<const DexFile>>& dex_files,
+    uint32_t random_seed) {
+  std::srand(random_seed);
+  ProfileCompilationInfo info;
+  for (std::unique_ptr<const DexFile>& dex_file : dex_files) {
+    const std::string& location = dex_file->GetLocation();
+    uint32_t checksum = dex_file->GetLocationChecksum();
+    for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      // Randomly add a class from the dex file (with 50% chance).
+      if (std::rand() % 2 != 0) {
+        info.AddClassIndex(location, checksum, dex::TypeIndex(dex_file->GetClassDef(i).class_idx_));
+      }
+    }
+    for (uint32_t i = 0; i < dex_file->NumMethodIds(); ++i) {
+      // Randomly add a method from the dex file (with 50% chance).
+      if (std::rand() % 2 != 0) {
+        info.AddMethodIndex(location, checksum, i);
+      }
+    }
+  }
+  return info.Save(fd);
+}
+
 bool ProfileCompilationInfo::OfflineProfileMethodInfo::operator==(
       const OfflineProfileMethodInfo& other) const {
   if (inline_caches.size() != other.inline_caches.size()) {
@@ -1210,4 +1262,17 @@
   return true;
 }
 
+void ProfileCompilationInfo::ClearProfile() {
+  for (DexFileData* dex_data : info_) {
+    delete dex_data;
+  }
+  info_.clear();
+  profile_key_map_.clear();
+}
+
+bool ProfileCompilationInfo::IsEmpty() const {
+  DCHECK_EQ(info_.empty(), profile_key_map_.empty());
+  return info_.empty();
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index 451d53e..87f7636 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -181,11 +181,16 @@
 
   // Public methods to create, extend or query the profile.
 
+  ProfileCompilationInfo() {}
+  ProfileCompilationInfo(const ProfileCompilationInfo& pci);
+  ~ProfileCompilationInfo();
+
   // Add the given methods and classes to the current profile object.
   bool AddMethodsAndClasses(const std::vector<ProfileMethodInfo>& methods,
                             const std::set<DexCacheResolvedClasses>& resolved_classes);
 
   // Load profile information from the given file descriptor.
+  // If the current profile is non-empty the load will fail.
   bool Load(int fd);
 
   // Merge the data from another ProfileCompilationInfo into the current object.
@@ -253,7 +258,14 @@
   static bool GenerateTestProfile(int fd,
                                   uint16_t number_of_dex_files,
                                   uint16_t method_ratio,
-                                  uint16_t class_ratio);
+                                  uint16_t class_ratio,
+                                  uint32_t random_seed);
+
+  // Generate a test profile which will randomly contain classes and methods from
+  // the provided list of dex files.
+  static bool GenerateTestProfile(int fd,
+                                  std::vector<std::unique_ptr<const DexFile>>& dex_files,
+                                  uint32_t random_seed);
 
   // Check that the given profile method info contain the same data.
   static bool Equals(const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi1,
@@ -261,6 +273,7 @@
 
  private:
   enum ProfileLoadSatus {
+    kProfileLoadWouldOverwiteData,
     kProfileLoadIOError,
     kProfileLoadVersionMismatch,
     kProfileLoadBadData,
@@ -271,14 +284,21 @@
   using MethodMap = SafeMap<uint16_t, InlineCacheMap>;
 
   // Internal representation of the profile information belonging to a dex file.
+  // Note that we could do without profile_key (the key used to encode the dex
+  // file in the profile) and profile_index (the index of the dex file in the
+  // profile) fields in this struct because we can infer them from
+  // profile_key_map_ and info_. However, it makes the profiles logic much
+  // simpler if we have references here as well.
   struct DexFileData {
-    DexFileData(uint32_t location_checksum, uint16_t index)
-         : profile_index(index), checksum(location_checksum) {}
-    // The profile index of this dex file (matches ClassReference#dex_profile_index)
+    DexFileData(const std::string& key, uint32_t location_checksum, uint16_t index)
+         : profile_key(key), profile_index(index), checksum(location_checksum) {}
+    // The profile key this data belongs to.
+    std::string profile_key;
+    // The profile index of this dex file (matches ClassReference#dex_profile_index).
     uint8_t profile_index;
-    // The dex checksum
+    // The dex checksum.
     uint32_t checksum;
-    // The methonds' profile information
+    // The methonds' profile information.
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
@@ -289,12 +309,9 @@
     }
   };
 
-  // Maps dex file to their profile information.
-  using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
-
-  // Return the profile data for the given dex location or null if the dex location
+  // Return the profile data for the given profile key or null if the dex location
   // already exists but has a different checksum
-  DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
+  DexFileData* GetOrAddDexFileData(const std::string& profile_key, uint32_t checksum);
 
   // Add a method index to the profile (without inline caches).
   bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
@@ -325,6 +342,16 @@
   // be the same as the profile index of the dex file (used to encode the ClassReferences).
   void DexFileToProfileIndex(/*out*/std::vector<DexReference>* dex_references) const;
 
+  // Return the dex data associated with the given profile key or null if the profile
+  // doesn't contain the key.
+  const DexFileData* FindDexData(const std::string& profile_key) const;
+
+  // Clear all the profile data.
+  void ClearProfile();
+
+  // Checks if the profile is empty.
+  bool IsEmpty() const;
+
   // Parsing functionality.
 
   // The information present in the header of each profile line.
@@ -431,7 +458,15 @@
   friend class ProfileAssistantTest;
   friend class Dex2oatLayoutTest;
 
-  DexFileToProfileInfoMap info_;
+  // Vector containing the actual profile info.
+  // The vector index is the profile index of the dex data and
+  // matched DexFileData::profile_index.
+  std::vector<DexFileData*> info_;
+
+  // Cache mapping profile keys to profile index.
+  // This is used to speed up searches since it avoids iterating
+  // over the info_ vector when searching by profile key.
+  SafeMap<const std::string, uint8_t> profile_key_map_;
 };
 
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 5cd8e8f..c9f2d0e 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -195,7 +195,7 @@
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
       dex_pc_data.AddClass(2, dex::TypeIndex(2));
 
-       pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Megamorphic
     for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
@@ -787,4 +787,26 @@
   ASSERT_TRUE(info_no_inline_cache.Save(GetFd(profile)));
 }
 
+TEST_F(ProfileCompilationInfoTest, LoadShouldClearExistingDataFromProfiles) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+
+  // Add a bunch of methods to test_info.
+  ProfileCompilationInfo test_info;
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &test_info));
+  }
+
+  // Attempt to load the saved profile into test_info.
+  // This should fail since the test_info already contains data and the load would overwrite it.
+  ASSERT_FALSE(test_info.Load(GetFd(profile)));
+}
 }  // namespace art
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
index 58c5d17..bd7c4ad 100644
--- a/runtime/method_handles.cc
+++ b/runtime/method_handles.cc
@@ -49,10 +49,19 @@
 bool GetUnboxedPrimitiveType(ObjPtr<mirror::Class> klass, Primitive::Type* type)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
-#define LOOKUP_PRIMITIVE(primitive, _, __, ___)                         \
-  if (klass->DescriptorEquals(Primitive::BoxedDescriptor(primitive))) { \
-    *type = primitive;                                                  \
-    return true;                                                        \
+  std::string storage;
+  const char* descriptor = klass->GetDescriptor(&storage);
+  static const char kJavaLangPrefix[] = "Ljava/lang/";
+  static const size_t kJavaLangPrefixSize = sizeof(kJavaLangPrefix) - 1;
+  if (strncmp(descriptor, kJavaLangPrefix, kJavaLangPrefixSize) != 0) {
+    return false;
+  }
+
+  descriptor += kJavaLangPrefixSize;
+#define LOOKUP_PRIMITIVE(primitive, _, java_name, ___) \
+  if (strcmp(descriptor, #java_name ";") == 0) {       \
+    *type = primitive;                                 \
+    return true;                                       \
   }
 
   PRIMITIVES_LIST(LOOKUP_PRIMITIVE);
@@ -141,21 +150,23 @@
     if (from->DescriptorEquals("Ljava/lang/Object;")) {
       // Object might be converted into a primitive during unboxing.
       return true;
-    } else if (Primitive::IsNumericType(to_primitive) &&
-               from->DescriptorEquals("Ljava/lang/Number;")) {
+    }
+
+    if (Primitive::IsNumericType(to_primitive) && from->DescriptorEquals("Ljava/lang/Number;")) {
       // Number might be unboxed into any of the number primitive types.
       return true;
     }
+
     Primitive::Type unboxed_type;
     if (GetUnboxedPrimitiveType(from, &unboxed_type)) {
       if (unboxed_type == to_primitive) {
         // Straightforward unboxing conversion such as Boolean => boolean.
         return true;
-      } else {
-        // Check if widening operations for numeric primitives would work,
-        // such as Byte => byte => long.
-        return Primitive::IsWidenable(unboxed_type, to_primitive);
       }
+
+      // Check if widening operations for numeric primitives would work,
+      // such as Byte => byte => long.
+      return Primitive::IsWidenable(unboxed_type, to_primitive);
     }
   }
 
@@ -372,25 +383,18 @@
 static inline size_t GetInsForProxyOrNativeMethod(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(method->IsNative() || method->IsProxyMethod());
-
   method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
-  size_t num_ins = 0;
-  // Separate accounting for the receiver, which isn't a part of the
-  // shorty.
-  if (!method->IsStatic()) {
-    ++num_ins;
-  }
+  uint32_t shorty_length = 0;
+  const char* shorty = method->GetShorty(&shorty_length);
 
-  uint32_t shorty_len = 0;
-  const char* shorty = method->GetShorty(&shorty_len);
-  for (size_t i = 1; i < shorty_len; ++i) {
-    const char c = shorty[i];
-    ++num_ins;
-    if (c == 'J' || c == 'D') {
+  // Static methods do not include the receiver. The receiver isn't included
+  // in the shorty_length though the return value is.
+  size_t num_ins = method->IsStatic() ? shorty_length - 1 : shorty_length;
+  for (const char* c = shorty + 1; *c != '\0'; ++c) {
+    if (*c == 'J' || *c == 'D') {
       ++num_ins;
     }
   }
-
   return num_ins;
 }
 
@@ -402,7 +406,10 @@
   ObjPtr<mirror::ObjectArray<mirror::Class>> param_types(callsite_type->GetPTypes());
   if (param_types->GetLength() == 1) {
     ObjPtr<mirror::Class> param(param_types->GetWithoutChecks(0));
-    return param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame);
+    // NB Comparing descriptor here as it appears faster in cycle simulation than using:
+    //   param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame)
+    // Costs are 98 vs 173 cycles per invocation.
+    return param->DescriptorEquals("Ldalvik/system/EmulatedStackFrame;");
   }
 
   return false;
@@ -416,35 +423,8 @@
                                      ShadowFrame& shadow_frame,
                                      const uint32_t (&args)[Instruction::kMaxVarArgRegs],
                                      uint32_t first_arg,
-                                     JValue* result,
-                                     const mirror::MethodHandle::Kind handle_kind)
+                                     JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  // For virtual and interface methods ensure called_method points to
-  // the actual method to invoke.
-  if (handle_kind == mirror::MethodHandle::Kind::kInvokeVirtual ||
-      handle_kind == mirror::MethodHandle::Kind::kInvokeInterface) {
-    uint32_t receiver_reg = is_range ? first_arg : args[0];
-    ObjPtr<mirror::Object> receiver(shadow_frame.GetVRegReference(receiver_reg));
-    if (IsCallerTransformer(callsite_type)) {
-      // The current receiver is an emulated stack frame, the method's
-      // receiver needs to be fetched from there as the emulated frame
-      // will be unpacked into a new frame.
-      receiver = ObjPtr<mirror::EmulatedStackFrame>::DownCast(receiver)->GetReceiver();
-    }
-
-    ObjPtr<mirror::Class> declaring_class(called_method->GetDeclaringClass());
-    if (receiver == nullptr || receiver->GetClass() != declaring_class) {
-      // Verify that _vRegC is an object reference and of the type expected by
-      // the receiver.
-      if (!VerifyObjectIsClass(receiver, declaring_class)) {
-        DCHECK(self->IsExceptionPending());
-        return false;
-      }
-      called_method = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(
-          called_method, kRuntimePointerSize);
-    }
-  }
-
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
 
@@ -513,17 +493,23 @@
           result->SetL(0);
           return false;
         }
-      } else if (!ConvertAndCopyArgumentsFromCallerFrame<is_range>(self,
-                                                                   callsite_type,
-                                                                   target_type,
-                                                                   shadow_frame,
-                                                                   args,
-                                                                   first_arg,
-                                                                   first_dest_reg,
-                                                                   new_shadow_frame)) {
-        DCHECK(self->IsExceptionPending());
-        result->SetL(0);
-        return false;
+      } else {
+        if (!callsite_type->IsConvertible(target_type.Get())) {
+          ThrowWrongMethodTypeException(target_type.Get(), callsite_type.Get());
+          return false;
+        }
+        if (!ConvertAndCopyArgumentsFromCallerFrame<is_range>(self,
+                                                              callsite_type,
+                                                              target_type,
+                                                              shadow_frame,
+                                                              args,
+                                                              first_arg,
+                                                              first_dest_reg,
+                                                              new_shadow_frame)) {
+          DCHECK(self->IsExceptionPending());
+          result->SetL(0);
+          return false;
+        }
       }
     }
   }
@@ -548,13 +534,13 @@
     if (ConvertReturnValue(emulated_stack_type, target_type, &local_result)) {
       emulated_stack_frame->SetReturnValue(self, local_result);
       return true;
-    } else {
-      DCHECK(self->IsExceptionPending());
-      return false;
     }
-  } else {
-    return ConvertReturnValue(callsite_type, target_type, result);
+
+    DCHECK(self->IsExceptionPending());
+    return false;
   }
+
+  return ConvertReturnValue(callsite_type, target_type, result);
 }
 
 template <bool is_range>
@@ -650,98 +636,130 @@
   return klass;
 }
 
+ArtMethod* RefineTargetMethod(Thread* self,
+                              ShadowFrame& shadow_frame,
+                              const mirror::MethodHandle::Kind& handle_kind,
+                              Handle<mirror::MethodType> handle_type,
+                              Handle<mirror::MethodType> callsite_type,
+                              const uint32_t receiver_reg,
+                              ArtMethod* target_method)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (handle_kind == mirror::MethodHandle::Kind::kInvokeVirtual ||
+      handle_kind == mirror::MethodHandle::Kind::kInvokeInterface) {
+    // For virtual and interface methods ensure target_method points to
+    // the actual method to invoke.
+    ObjPtr<mirror::Object> receiver(shadow_frame.GetVRegReference(receiver_reg));
+    if (IsCallerTransformer(callsite_type)) {
+      // The current receiver is an emulated stack frame, the method's
+      // receiver needs to be fetched from there as the emulated frame
+      // will be unpacked into a new frame.
+      receiver = ObjPtr<mirror::EmulatedStackFrame>::DownCast(receiver)->GetReceiver();
+    }
+
+    ObjPtr<mirror::Class> declaring_class(target_method->GetDeclaringClass());
+    if (receiver == nullptr || receiver->GetClass() != declaring_class) {
+      // Verify that _vRegC is an object reference and of the type expected by
+      // the receiver.
+      if (!VerifyObjectIsClass(receiver, declaring_class)) {
+        DCHECK(self->IsExceptionPending());
+        return nullptr;
+      }
+      return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(
+          target_method, kRuntimePointerSize);
+    }
+  } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeDirect) {
+    // String constructors are a special case, they are replaced with
+    // StringFactory methods.
+    if (target_method->IsConstructor() && target_method->GetDeclaringClass()->IsStringClass()) {
+      DCHECK(handle_type->GetRType()->IsStringClass());
+      return WellKnownClasses::StringInitToStringFactory(target_method);
+    }
+  } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeSuper) {
+    ObjPtr<mirror::Class> declaring_class = target_method->GetDeclaringClass();
+
+    // Note that we're not dynamically dispatching on the type of the receiver
+    // here. We use the static type of the "receiver" object that we've
+    // recorded in the method handle's type, which will be the same as the
+    // special caller that was specified at the point of lookup.
+    ObjPtr<mirror::Class> referrer_class = handle_type->GetPTypes()->Get(0);
+    if (!declaring_class->IsInterface()) {
+      ObjPtr<mirror::Class> super_class = referrer_class->GetSuperClass();
+      uint16_t vtable_index = target_method->GetMethodIndex();
+      DCHECK(super_class != nullptr);
+      DCHECK(super_class->HasVTable());
+      // Note that super_class is a super of referrer_class and target_method
+      // will always be declared by super_class (or one of its super classes).
+      DCHECK_LT(vtable_index, super_class->GetVTableLength());
+      return super_class->GetVTableEntry(vtable_index, kRuntimePointerSize);
+    } else {
+      return referrer_class->FindVirtualMethodForInterfaceSuper(target_method, kRuntimePointerSize);
+    }
+  }
+  return target_method;
+}
+
 template <bool is_range>
-bool DoInvokePolymorphicUnchecked(Thread* self,
-                                  ShadowFrame& shadow_frame,
-                                  Handle<mirror::MethodHandle> method_handle,
-                                  Handle<mirror::MethodType> callsite_type,
-                                  const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                                  uint32_t first_arg,
-                                  JValue* result)
+bool DoInvokePolymorphicMethod(Thread* self,
+                               ShadowFrame& shadow_frame,
+                               Handle<mirror::MethodHandle> method_handle,
+                               Handle<mirror::MethodType> callsite_type,
+                               const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                               uint32_t first_arg,
+                               JValue* result)
   REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<1> hs(self);
   Handle<mirror::MethodType> handle_type(hs.NewHandle(method_handle->GetMethodType()));
   const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
-  if (IsInvoke(handle_kind)) {
-    // Get the method we're actually invoking along with the kind of
-    // invoke that is desired. We don't need to perform access checks at this
-    // point because they would have been performed on our behalf at the point
-    // of creation of the method handle.
-    ArtMethod* called_method = method_handle->GetTargetMethod();
-    CHECK(called_method != nullptr);
+  DCHECK(IsInvoke(handle_kind));
 
-    if (handle_kind == mirror::MethodHandle::Kind::kInvokeVirtual ||
-        handle_kind == mirror::MethodHandle::Kind::kInvokeInterface) {
-      // TODO: Unfortunately, we have to postpone dynamic receiver based checks
-      // because the receiver might be cast or might come from an emulated stack
-      // frame, which means that it is unknown at this point. We perform these
-      // checks inside DoCallPolymorphic right before we do the actual invoke.
-    } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeDirect) {
-      // String constructors are a special case, they are replaced with StringFactory
-      // methods.
-      if (called_method->IsConstructor() && called_method->GetDeclaringClass()->IsStringClass()) {
-        DCHECK(handle_type->GetRType()->IsStringClass());
-        called_method = WellKnownClasses::StringInitToStringFactory(called_method);
-      }
-    } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeSuper) {
-      ObjPtr<mirror::Class> declaring_class = called_method->GetDeclaringClass();
+  // Get the method we're actually invoking along with the kind of
+  // invoke that is desired. We don't need to perform access checks at this
+  // point because they would have been performed on our behalf at the point
+  // of creation of the method handle.
+  ArtMethod* target_method = method_handle->GetTargetMethod();
+  uint32_t receiver_reg = is_range ? first_arg: args[0];
+  ArtMethod* called_method = RefineTargetMethod(self,
+                                                shadow_frame,
+                                                handle_kind,
+                                                handle_type,
+                                                callsite_type,
+                                                receiver_reg,
+                                                target_method);
+  if (called_method == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return false;
+  }
 
-      // Note that we're not dynamically dispatching on the type of the receiver
-      // here. We use the static type of the "receiver" object that we've
-      // recorded in the method handle's type, which will be the same as the
-      // special caller that was specified at the point of lookup.
-      ObjPtr<mirror::Class> referrer_class = handle_type->GetPTypes()->Get(0);
-      if (!declaring_class->IsInterface()) {
-        ObjPtr<mirror::Class> super_class = referrer_class->GetSuperClass();
-        uint16_t vtable_index = called_method->GetMethodIndex();
-        DCHECK(super_class != nullptr);
-        DCHECK(super_class->HasVTable());
-        // Note that super_class is a super of referrer_class and called_method
-        // will always be declared by super_class (or one of its super classes).
-        DCHECK_LT(vtable_index, super_class->GetVTableLength());
-        called_method = super_class->GetVTableEntry(vtable_index, kRuntimePointerSize);
-      } else {
-        called_method = referrer_class->FindVirtualMethodForInterfaceSuper(
-            called_method, kRuntimePointerSize);
-      }
-      CHECK(called_method != nullptr);
-    }
-    if (IsInvokeTransform(handle_kind)) {
-      // There are two cases here - method handles representing regular
-      // transforms and those representing call site transforms. Method
-      // handles for call site transforms adapt their MethodType to match
-      // the call site. For these, the |callee_type| is the same as the
-      // |callsite_type|. The VarargsCollector is such a tranform, its
-      // method type depends on the call site, ie. x(a) or x(a, b), or
-      // x(a, b, c). The VarargsCollector invokes a variable arity method
-      // with the arity arguments in an array.
-      Handle<mirror::MethodType> callee_type =
-          (handle_kind == mirror::MethodHandle::Kind::kInvokeCallSiteTransform) ? callsite_type
-          : handle_type;
-      return DoCallTransform<is_range>(called_method,
+  if (IsInvokeTransform(handle_kind)) {
+    // There are two cases here - method handles representing regular
+    // transforms and those representing call site transforms. Method
+    // handles for call site transforms adapt their MethodType to match
+    // the call site. For these, the |callee_type| is the same as the
+    // |callsite_type|. The VarargsCollector is such a tranform, its
+    // method type depends on the call site, ie. x(a) or x(a, b), or
+    // x(a, b, c). The VarargsCollector invokes a variable arity method
+    // with the arity arguments in an array.
+    Handle<mirror::MethodType> callee_type =
+        (handle_kind == mirror::MethodHandle::Kind::kInvokeCallSiteTransform) ? callsite_type
+        : handle_type;
+    return DoCallTransform<is_range>(called_method,
+                                     callsite_type,
+                                     callee_type,
+                                     self,
+                                     shadow_frame,
+                                     method_handle /* receiver */,
+                                     args,
+                                     first_arg,
+                                     result);
+  } else {
+    return DoCallPolymorphic<is_range>(called_method,
                                        callsite_type,
-                                       callee_type,
+                                       handle_type,
                                        self,
                                        shadow_frame,
-                                       method_handle /* receiver */,
                                        args,
                                        first_arg,
                                        result);
-
-    } else {
-      return DoCallPolymorphic<is_range>(called_method,
-                                         callsite_type,
-                                         handle_type,
-                                         self,
-                                         shadow_frame,
-                                         args,
-                                         first_arg,
-                                         result,
-                                         handle_kind);
-    }
-  } else {
-    LOG(FATAL) << "Unreachable: " << handle_kind;
-    UNREACHABLE();
   }
 }
 
@@ -948,55 +966,30 @@
   ObjPtr<mirror::MethodType> handle_type(method_handle->GetMethodType());
   CHECK(handle_type != nullptr);
 
-  if (!IsInvokeTransform(handle_kind)) {
-    if (UNLIKELY(!IsCallerTransformer(callsite_type) &&
-                 !callsite_type->IsConvertible(handle_type.Ptr()))) {
+  if (IsFieldAccess(handle_kind)) {
+    DCHECK(!callsite_type->IsExactMatch(handle_type.Ptr()));
+    if (!callsite_type->IsConvertible(handle_type.Ptr())) {
       ThrowWrongMethodTypeException(handle_type.Ptr(), callsite_type.Get());
       return false;
     }
+    const bool do_convert = true;
+    return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
+        self,
+        shadow_frame,
+        method_handle,
+        callsite_type,
+        args,
+        first_arg,
+        result);
   }
 
-  if (IsFieldAccess(handle_kind)) {
-    if (UNLIKELY(callsite_type->IsExactMatch(handle_type.Ptr()))) {
-      const bool do_convert = false;
-      return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
-          self,
-          shadow_frame,
-          method_handle,
-          callsite_type,
-          args,
-          first_arg,
-          result);
-    } else {
-      const bool do_convert = true;
-      return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
-          self,
-          shadow_frame,
-          method_handle,
-          callsite_type,
-          args,
-          first_arg,
-          result);
-    }
-  }
-
-  if (UNLIKELY(callsite_type->IsExactMatch(handle_type.Ptr()))) {
-    return DoInvokePolymorphicUnchecked<is_range>(self,
-                                                  shadow_frame,
-                                                  method_handle,
-                                                  callsite_type,
-                                                  args,
-                                                  first_arg,
-                                                  result);
-  } else {
-    return DoInvokePolymorphicUnchecked<is_range>(self,
-                                                  shadow_frame,
-                                                  method_handle,
-                                                  callsite_type,
-                                                  args,
-                                                  first_arg,
-                                                  result);
-  }
+  return DoInvokePolymorphicMethod<is_range>(self,
+                                             shadow_frame,
+                                             method_handle,
+                                             callsite_type,
+                                             args,
+                                             first_arg,
+                                             result);
 }
 
 template <bool is_range>
@@ -1008,32 +1001,9 @@
                               uint32_t first_arg,
                               JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  // We need to check the nominal type of the handle in addition to the
-  // real type. The "nominal" type is present when MethodHandle.asType is
-  // called any handle, and results in the declared type of the handle
-  // changing.
-  ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
-  if (UNLIKELY(nominal_type != nullptr)) {
-    if (UNLIKELY(!callsite_type->IsExactMatch(nominal_type.Ptr()))) {
-      ThrowWrongMethodTypeException(nominal_type.Ptr(), callsite_type.Get());
-      return false;
-    }
-    return DoInvokePolymorphicNonExact<is_range>(self,
-                                                 shadow_frame,
-                                                 method_handle,
-                                                 callsite_type,
-                                                 args,
-                                                 first_arg,
-                                                 result);
-  }
-
-  ObjPtr<mirror::MethodType> handle_type(method_handle->GetMethodType());
-  if (UNLIKELY(!callsite_type->IsExactMatch(handle_type.Ptr()))) {
-    ThrowWrongMethodTypeException(handle_type.Ptr(), callsite_type.Get());
-    return false;
-  }
-
+  StackHandleScope<1> hs(self);
   const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
+  Handle<mirror::MethodType> method_handle_type(hs.NewHandle(method_handle->GetMethodType()));
   if (IsFieldAccess(handle_kind)) {
     const bool do_convert = false;
     return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
@@ -1046,13 +1016,68 @@
         result);
   }
 
-  return DoInvokePolymorphicUnchecked<is_range>(self,
+  // Slow-path check.
+  if (IsInvokeTransform(handle_kind) || IsCallerTransformer(callsite_type)) {
+    return DoInvokePolymorphicMethod<is_range>(self,
+                                               shadow_frame,
+                                               method_handle,
+                                               callsite_type,
+                                               args,
+                                               first_arg,
+                                               result);
+  }
+
+  // On the fast-path. This is equivalent to DoCallPolymoprhic without the conversion paths.
+  ArtMethod* target_method = method_handle->GetTargetMethod();
+  uint32_t receiver_reg = is_range ? first_arg : args[0];
+  ArtMethod* called_method = RefineTargetMethod(self,
                                                 shadow_frame,
-                                                method_handle,
+                                                handle_kind,
+                                                method_handle_type,
                                                 callsite_type,
-                                                args,
-                                                first_arg,
-                                                result);
+                                                receiver_reg,
+                                                target_method);
+  if (called_method == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  // Compute method information.
+  const DexFile::CodeItem* code_item = called_method->GetCodeItem();
+  uint16_t num_regs;
+  size_t num_input_regs;
+  size_t first_dest_reg;
+  if (LIKELY(code_item != nullptr)) {
+    num_regs = code_item->registers_size_;
+    first_dest_reg = num_regs - code_item->ins_size_;
+    num_input_regs = code_item->ins_size_;
+    // Parameter registers go at the end of the shadow frame.
+    DCHECK_NE(first_dest_reg, (size_t)-1);
+  } else {
+    // No local regs for proxy and native methods.
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
+    num_regs = num_input_regs = GetInsForProxyOrNativeMethod(called_method);
+    first_dest_reg = 0;
+  }
+
+  // Allocate shadow frame on the stack.
+  const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, /* dex pc */ 0);
+  ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
+  CopyArgumentsFromCallerFrame<is_range>(shadow_frame,
+                                         new_shadow_frame,
+                                         args,
+                                         first_arg,
+                                         first_dest_reg,
+                                         num_input_regs);
+  self->EndAssertNoThreadSuspension(old_cause);
+
+  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
+  if (self->IsExceptionPending()) {
+    return false;
+  }
+  return true;
 }
 
 }  // namespace
@@ -1067,7 +1092,35 @@
                          uint32_t first_arg,
                          JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::MethodType> method_handle_type = method_handle->GetMethodType();
   if (IsMethodHandleInvokeExact(invoke_method)) {
+    // We need to check the nominal type of the handle in addition to the
+    // real type. The "nominal" type is present when MethodHandle.asType is
+    // called any handle, and results in the declared type of the handle
+    // changing.
+    ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
+    if (UNLIKELY(nominal_type != nullptr)) {
+      if (UNLIKELY(!callsite_type->IsExactMatch(nominal_type.Ptr()))) {
+        ThrowWrongMethodTypeException(nominal_type.Ptr(), callsite_type.Get());
+        return false;
+      }
+
+      if (LIKELY(!nominal_type->IsExactMatch(method_handle_type.Ptr()))) {
+        // Different nominal type means we have to treat as non-exact.
+        return DoInvokePolymorphicNonExact<is_range>(self,
+                                                     shadow_frame,
+                                                     method_handle,
+                                                     callsite_type,
+                                                     args,
+                                                     first_arg,
+                                                     result);
+      }
+    }
+
+    if (!callsite_type->IsExactMatch(method_handle_type.Ptr())) {
+      ThrowWrongMethodTypeException(method_handle_type.Ptr(), callsite_type.Get());
+      return false;
+    }
     return DoInvokePolymorphicExact<is_range>(self,
                                               shadow_frame,
                                               method_handle,
@@ -1076,6 +1129,16 @@
                                               first_arg,
                                               result);
   } else {
+    if (UNLIKELY(callsite_type->IsExactMatch(method_handle_type.Ptr()))) {
+      // A non-exact invoke that can be invoked exactly.
+      return DoInvokePolymorphicExact<is_range>(self,
+                                                shadow_frame,
+                                                method_handle,
+                                                callsite_type,
+                                                args,
+                                                first_arg,
+                                                result);
+    }
     return DoInvokePolymorphicNonExact<is_range>(self,
                                                  shadow_frame,
                                                  method_handle,
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 8e591e4..811f1ea 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -187,10 +187,12 @@
   uint32_t rb_state = lw.ReadBarrierState();
   return rb_state;
 #else
-  // mips/mips64
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-  UNUSED(fake_address_dependency);
+  // MIPS32/MIPS64: use a memory barrier to prevent load-load reordering.
+  LockWord lw = GetLockWord(false);
+  *fake_address_dependency = 0;
+  std::atomic_thread_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
 #endif
 }
 
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 7460d62..cbc5024 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -105,7 +105,7 @@
   if (pid == -1) {
     close(caller_to_addr2line[0]);
     close(caller_to_addr2line[1]);
-    close(addr2line_to_caller[1]);
+    close(addr2line_to_caller[0]);
     close(addr2line_to_caller[1]);
     return nullptr;
   }
diff --git a/runtime/oat.h b/runtime/oat.h
index 190d533..faa0129 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '6', '\0' };  // Add method infos.
+  static constexpr uint8_t kOatVersion[] = { '1', '1', '7', '\0' };  // Read barriers on MIPS.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 448e1ed..39e603e 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1513,8 +1513,8 @@
 ArtJvmTiEnv::ArtJvmTiEnv(art::JavaVMExt* runtime, EventHandler* event_handler)
     : art_vm(runtime),
       local_data(nullptr),
-      capabilities(),
-      object_tag_table(new ObjectTagTable(event_handler)) {
+      capabilities() {
+  object_tag_table = std::unique_ptr<ObjectTagTable>(new ObjectTagTable(event_handler, this));
   functions = &gJvmtiInterface;
 }
 
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
index d88805e..1ddbb86 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -169,15 +169,19 @@
 // exactly the argument types of the corresponding Jvmti kEvent function pointer.
 
 template <ArtJvmtiEvent kEvent, typename ...Args>
-inline void EventHandler::DispatchEvent(art::Thread* thread,
-                                        Args... args) const {
-  using FnType = void(jvmtiEnv*, Args...);
+inline void EventHandler::DispatchEvent(art::Thread* thread, Args... args) const {
   for (ArtJvmTiEnv* env : envs) {
-    if (ShouldDispatch<kEvent>(env, thread)) {
-      FnType* callback = impl::GetCallback<kEvent>(env);
-      if (callback != nullptr) {
-        (*callback)(env, args...);
-      }
+    DispatchEvent<kEvent, Args...>(env, thread, args...);
+  }
+}
+
+template <ArtJvmtiEvent kEvent, typename ...Args>
+inline void EventHandler::DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const {
+  using FnType = void(jvmtiEnv*, Args...);
+  if (ShouldDispatch<kEvent>(env, thread)) {
+    FnType* callback = impl::GetCallback<kEvent>(env);
+    if (callback != nullptr) {
+      (*callback)(env, args...);
     }
   }
 }
diff --git a/runtime/openjdkjvmti/events.h b/runtime/openjdkjvmti/events.h
index 4e20d17..ae8bf0f 100644
--- a/runtime/openjdkjvmti/events.h
+++ b/runtime/openjdkjvmti/events.h
@@ -156,9 +156,14 @@
                       ArtJvmtiEvent event,
                       jvmtiEventMode mode);
 
+  // Dispatch event to all registered environments.
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
   inline void DispatchEvent(art::Thread* thread, Args... args) const;
+  // Dispatch event to the given environment, only.
+  template <ArtJvmtiEvent kEvent, typename ...Args>
+  ALWAYS_INLINE
+  inline void DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const;
 
   // Tell the event handler capabilities were added/lost so it can adjust the sent events.If
   // caps_added is true then caps is all the newly set capabilities of the jvmtiEnv. If it is false
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
index ae36122..eeea75a 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -53,7 +53,7 @@
 class JvmtiWeakTable : public art::gc::SystemWeakHolder {
  public:
   JvmtiWeakTable()
-      : art::gc::SystemWeakHolder(kTaggingLockLevel),
+      : art::gc::SystemWeakHolder(art::kTaggingLockLevel),
         update_since_last_sweep_(false) {
   }
 
@@ -200,10 +200,6 @@
     }
   };
 
-  // The tag table is used when visiting roots. So it needs to have a low lock level.
-  static constexpr art::LockLevel kTaggingLockLevel =
-      static_cast<art::LockLevel>(art::LockLevel::kAbortLock + 1);
-
   std::unordered_map<art::GcRoot<art::mirror::Object>,
                      T,
                      HashGcRoot,
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/runtime/openjdkjvmti/object_tagging.cc
index 4215588..dcdd3ed 100644
--- a/runtime/openjdkjvmti/object_tagging.cc
+++ b/runtime/openjdkjvmti/object_tagging.cc
@@ -33,6 +33,7 @@
 
 #include <limits>
 
+#include "art_jvmti.h"
 #include "events-inl.h"
 #include "jvmti_weak_table-inl.h"
 
@@ -60,7 +61,7 @@
   return event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kObjectFree);
 }
 void ObjectTagTable::HandleNullSweep(jlong tag) {
-  event_handler_->DispatchEvent<ArtJvmtiEvent::kObjectFree>(nullptr, tag);
+  event_handler_->DispatchEvent<ArtJvmtiEvent::kObjectFree>(jvmti_env_, nullptr, tag);
 }
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/object_tagging.h b/runtime/openjdkjvmti/object_tagging.h
index b5a601c..ca84e44 100644
--- a/runtime/openjdkjvmti/object_tagging.h
+++ b/runtime/openjdkjvmti/object_tagging.h
@@ -42,12 +42,13 @@
 
 namespace openjdkjvmti {
 
+struct ArtJvmTiEnv;
 class EventHandler;
 
 class ObjectTagTable FINAL : public JvmtiWeakTable<jlong> {
  public:
-  explicit ObjectTagTable(EventHandler* event_handler) : event_handler_(event_handler) {
-  }
+  ObjectTagTable(EventHandler* event_handler, ArtJvmTiEnv* env)
+      : event_handler_(event_handler), jvmti_env_(env) {}
 
   bool Set(art::mirror::Object* obj, jlong tag) OVERRIDE
       REQUIRES_SHARED(art::Locks::mutator_lock_)
@@ -77,6 +78,7 @@
 
  private:
   EventHandler* event_handler_;
+  ArtJvmTiEnv* jvmti_env_;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index e494cb6..941cf7b 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -40,6 +40,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
+#include "ti_thread.h"
 
 namespace openjdkjvmti {
 
@@ -69,6 +70,7 @@
         break;
       case RuntimePhase::kInit:
         {
+          ThreadUtil::CacheData();
           ScopedLocalRef<jthread> thread(GetJniEnv(), GetCurrentJThread());
           art::ScopedThreadSuspension sts(art::Thread::Current(), art::ThreadState::kNative);
           event_handler->DispatchEvent<ArtJvmtiEvent::kVmInit>(nullptr, GetJniEnv(), thread.get());
@@ -105,6 +107,16 @@
   return ERR(NONE);
 }
 
+bool PhaseUtil::IsLivePhase() {
+  jvmtiPhase now = PhaseUtil::current_phase_;
+  DCHECK(now == JVMTI_PHASE_ONLOAD ||
+         now == JVMTI_PHASE_PRIMORDIAL ||
+         now == JVMTI_PHASE_START ||
+         now == JVMTI_PHASE_LIVE ||
+         now == JVMTI_PHASE_DEAD);
+  return now == JVMTI_PHASE_LIVE;
+}
+
 void PhaseUtil::SetToOnLoad() {
   DCHECK_EQ(0u, static_cast<size_t>(PhaseUtil::current_phase_));
   PhaseUtil::current_phase_ = JVMTI_PHASE_ONLOAD;
@@ -117,6 +129,7 @@
 
 void PhaseUtil::SetToLive() {
   DCHECK_EQ(static_cast<size_t>(0), static_cast<size_t>(PhaseUtil::current_phase_));
+  ThreadUtil::CacheData();
   PhaseUtil::current_phase_ = JVMTI_PHASE_LIVE;
 }
 
diff --git a/runtime/openjdkjvmti/ti_phase.h b/runtime/openjdkjvmti/ti_phase.h
index 851fc27..a2c0d11 100644
--- a/runtime/openjdkjvmti/ti_phase.h
+++ b/runtime/openjdkjvmti/ti_phase.h
@@ -42,6 +42,7 @@
 class PhaseUtil {
  public:
   static jvmtiError GetPhase(jvmtiEnv* env, jvmtiPhase* phase_ptr);
+  static bool IsLivePhase();
 
   static void Register(EventHandler* event_handler);
   static void Unregister();
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index 788ac30..e5ff090 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -44,6 +44,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "obj_ptr.h"
+#include "ti_phase.h"
 #include "runtime.h"
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
@@ -54,6 +55,8 @@
 
 namespace openjdkjvmti {
 
+art::ArtField* ThreadUtil::context_class_loader_ = nullptr;
+
 struct ThreadCallback : public art::ThreadLifecycleCallback, public art::RuntimePhaseCallback {
   jthread GetThreadObject(art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
     if (self->GetPeer() == nullptr) {
@@ -121,6 +124,16 @@
   runtime->GetRuntimeCallbacks()->AddRuntimePhaseCallback(&gThreadCallback);
 }
 
+void ThreadUtil::CacheData() {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::ObjPtr<art::mirror::Class> thread_class =
+      soa.Decode<art::mirror::Class>(art::WellKnownClasses::java_lang_Thread);
+  CHECK(thread_class != nullptr);
+  context_class_loader_ = thread_class->FindDeclaredInstanceField("contextClassLoader",
+                                                                  "Ljava/lang/ClassLoader;");
+  CHECK(context_class_loader_ != nullptr);
+}
+
 void ThreadUtil::Unregister() {
   art::ScopedThreadStateChange stsc(art::Thread::Current(),
                                     art::ThreadState::kWaitingForDebuggerToAttach);
@@ -146,22 +159,6 @@
   return ERR(NONE);
 }
 
-// Read the context classloader from a Java thread object. This is a lazy implementation
-// that assumes GetThreadInfo isn't called too often. If we instead cache the ArtField,
-// we will have to add synchronization as this can't be cached on startup (which is
-// potentially runtime startup).
-static art::ObjPtr<art::mirror::Object> GetContextClassLoader(art::ObjPtr<art::mirror::Object> peer)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  if (peer == nullptr) {
-    return nullptr;
-  }
-  art::ObjPtr<art::mirror::Class> klass = peer->GetClass();
-  art::ArtField* cc_field = klass->FindDeclaredInstanceField("contextClassLoader",
-                                                             "Ljava/lang/ClassLoader;");
-  CHECK(cc_field != nullptr);
-  return cc_field->GetObject(peer);
-}
-
 // Get the native thread. The spec says a null object denotes the current thread.
 static art::Thread* GetNativeThread(jthread thread,
                                     const art::ScopedObjectAccessAlreadyRunnable& soa)
@@ -178,6 +175,9 @@
   if (info_ptr == nullptr) {
     return ERR(NULL_POINTER);
   }
+  if (!PhaseUtil::IsLivePhase()) {
+    return JVMTI_ERROR_WRONG_PHASE;
+  }
 
   art::ScopedObjectAccess soa(art::Thread::Current());
 
@@ -217,7 +217,10 @@
     }
 
     // Context classloader.
-    art::ObjPtr<art::mirror::Object> ccl = GetContextClassLoader(peer);
+    DCHECK(context_class_loader_ != nullptr);
+    art::ObjPtr<art::mirror::Object> ccl = peer != nullptr
+        ? context_class_loader_->GetObject(peer)
+        : nullptr;
     info_ptr->context_class_loader = ccl == nullptr
                                          ? nullptr
                                          : soa.AddLocalReference<jobject>(ccl);
@@ -272,7 +275,10 @@
     }
 
     // Context classloader.
-    art::ObjPtr<art::mirror::Object> ccl = GetContextClassLoader(peer);
+    DCHECK(context_class_loader_ != nullptr);
+    art::ObjPtr<art::mirror::Object> ccl = peer != nullptr
+        ? context_class_loader_->GetObject(peer)
+        : nullptr;
     info_ptr->context_class_loader = ccl == nullptr
                                          ? nullptr
                                          : soa.AddLocalReference<jobject>(ccl);
diff --git a/runtime/openjdkjvmti/ti_thread.h b/runtime/openjdkjvmti/ti_thread.h
index f6f93ee..c7f75d8 100644
--- a/runtime/openjdkjvmti/ti_thread.h
+++ b/runtime/openjdkjvmti/ti_thread.h
@@ -35,6 +35,10 @@
 #include "jni.h"
 #include "jvmti.h"
 
+namespace art {
+class ArtField;
+}
+
 namespace openjdkjvmti {
 
 class EventHandler;
@@ -44,6 +48,9 @@
   static void Register(EventHandler* event_handler);
   static void Unregister();
 
+  // To be called when it is safe to cache data.
+  static void CacheData();
+
   static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr);
 
   static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr);
@@ -60,6 +67,9 @@
                                    jvmtiStartFunction proc,
                                    const void* arg,
                                    jint priority);
+
+ private:
+  static art::ArtField* context_class_loader_;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 4d787db..0784e59 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -96,7 +96,7 @@
       //     .WithType<std::vector<ti::Agent>>().AppendValues()
       //     .IntoKey(M::AgentLib)
       .Define("-agentpath:_")
-          .WithType<std::vector<ti::Agent>>().AppendValues()
+          .WithType<std::list<ti::Agent>>().AppendValues()
           .IntoKey(M::AgentPath)
       .Define("-Xms_")
           .WithType<MemoryKiB>()
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d244a9b..92feabb 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -733,7 +733,7 @@
   std::string class_path_string_;
   std::vector<std::string> properties_;
 
-  std::vector<ti::Agent> agents_;
+  std::list<ti::Agent> agents_;
   std::vector<Plugin> plugins_;
 
   // The default stack size for managed threads created by the runtime.
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index e68a1b2..16190cd 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -120,8 +120,8 @@
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
 RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{...}
-RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>
-RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>
+RUNTIME_OPTIONS_KEY (std::list<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>
+RUNTIME_OPTIONS_KEY (std::list<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>
 RUNTIME_OPTIONS_KEY (std::vector<Plugin>,            Plugins)  // -Xplugin:<library>
 
 // Not parse-able from command line, but can be provided explicitly.
diff --git a/test/643-checker-bogus-ic/expected.txt b/test/643-checker-bogus-ic/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/643-checker-bogus-ic/expected.txt
diff --git a/test/643-checker-bogus-ic/info.txt b/test/643-checker-bogus-ic/info.txt
new file mode 100644
index 0000000..d5dfff4
--- /dev/null
+++ b/test/643-checker-bogus-ic/info.txt
@@ -0,0 +1 @@
+Verify the compiler can handle a bogus inline cache in a profile.
diff --git a/test/643-checker-bogus-ic/profile b/test/643-checker-bogus-ic/profile
new file mode 100644
index 0000000..cbf7796
--- /dev/null
+++ b/test/643-checker-bogus-ic/profile
@@ -0,0 +1,2 @@
+LMain;->inlineMonomorphic(LMain;)I+LUnrelated;
+LMain;->inlinePolymorphic(LMain;)I+LUnrelated;,LMain;
diff --git a/test/643-checker-bogus-ic/run b/test/643-checker-bogus-ic/run
new file mode 100644
index 0000000..146e180
--- /dev/null
+++ b/test/643-checker-bogus-ic/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/643-checker-bogus-ic/src/Main.java b/test/643-checker-bogus-ic/src/Main.java
new file mode 100644
index 0000000..0aa8477
--- /dev/null
+++ b/test/643-checker-bogus-ic/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Unrelated {
+}
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (after)
+  /// CHECK:   InvokeVirtual method_name:Main.getValue
+
+  public static int inlineMonomorphic(Main a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (after)
+  /// CHECK:   InvokeVirtual method_name:Main.getValue
+  public static int inlinePolymorphic(Main a) {
+    return a.getValue();
+  }
+
+  public int getValue() {
+    return 42;
+  }
+
+  public static void main(String[] args) {
+    inlineMonomorphic(new Main());
+  }
+
+}
diff --git a/test/900-hello-plugin/expected.txt b/test/900-hello-plugin/expected.txt
index 43db31c..c160f65 100644
--- a/test/900-hello-plugin/expected.txt
+++ b/test/900-hello-plugin/expected.txt
@@ -3,6 +3,8 @@
 GetEnvHandler called in test 900
 GetEnvHandler called with version 0x900fffff
 GetEnv returned '900' environment!
+Agent_OnLoad called with options "test_900_round_2"
 Hello, world!
 Agent_OnUnload called
+Agent_OnUnload called
 ArtPlugin_Deinitialize called in test 900
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
index a38cc3d..290997a 100644
--- a/test/900-hello-plugin/load_unload.cc
+++ b/test/900-hello-plugin/load_unload.cc
@@ -52,6 +52,9 @@
                                                char* options,
                                                void* reserved ATTRIBUTE_UNUSED) {
   printf("Agent_OnLoad called with options \"%s\"\n", options);
+  if (strcmp("test_900_round_2", options) == 0) {
+    return 0;
+  }
   uintptr_t env = 0;
   jint res = vm->GetEnv(reinterpret_cast<void**>(&env), TEST_900_ENV_VERSION_NUMBER);
   if (res != JNI_OK) {
diff --git a/test/900-hello-plugin/run b/test/900-hello-plugin/run
index 50835f8..c633f6d 100755
--- a/test/900-hello-plugin/run
+++ b/test/900-hello-plugin/run
@@ -19,4 +19,5 @@
   plugin=libartagent.so
 fi
 ./default-run "$@" --runtime-option -agentpath:${plugin}=test_900 \
+                   --runtime-option -agentpath:${plugin}=test_900_round_2 \
                    --android-runtime-option -Xplugin:${plugin}
diff --git a/test/905-object-free/expected.txt b/test/905-object-free/expected.txt
index 436ca11..c226df7 100644
--- a/test/905-object-free/expected.txt
+++ b/test/905-object-free/expected.txt
@@ -10,3 +10,4 @@
 ---
 []
 ---
+Free counts 100000 100000
diff --git a/test/905-object-free/src/Main.java b/test/905-object-free/src/Main.java
index e41e378..0d57629 100644
--- a/test/905-object-free/src/Main.java
+++ b/test/905-object-free/src/Main.java
@@ -33,6 +33,9 @@
 
     enableFreeTracking(false);
     run(l);
+
+    enableFreeTracking(true);
+    stress();
   }
 
   private static void run(ArrayList<Object> l) {
@@ -62,6 +65,30 @@
     System.out.println("---");
   }
 
+  private static void stressAllocate(int i) {
+    Object obj = new Object();
+    setTag(obj, i);
+    setTag2(obj, i + 1);
+  }
+
+  private static void stress() {
+    getCollectedTags(0);
+    getCollectedTags(1);
+    // Allocate objects.
+    for (int i = 1; i <= 100000; ++i) {
+      stressAllocate(i);
+    }
+    Runtime.getRuntime().gc();
+    long[] freedTags1 = getCollectedTags(0);
+    long[] freedTags2 = getCollectedTags(1);
+    System.out.println("Free counts " + freedTags1.length + " " + freedTags2.length);
+    for (int i = 0; i < freedTags1.length; ++i) {
+      if (freedTags1[i] + 1 != freedTags2[i]) {
+        System.out.println("Mismatched tags " + freedTags1[i] + " " + freedTags2[i]);
+      }
+    }
+  }
+
   private static void allocate(ArrayList<Object> l, long tag) {
     Object obj = new Object();
     l.add(obj);
@@ -69,7 +96,7 @@
   }
 
   private static void getAndPrintTags() {
-    long[] freedTags = getCollectedTags();
+    long[] freedTags = getCollectedTags(0);
     Arrays.sort(freedTags);
     System.out.println(Arrays.toString(freedTags));
   }
@@ -77,5 +104,6 @@
   private static native void setupObjectFreeCallback();
   private static native void enableFreeTracking(boolean enable);
   private static native void setTag(Object o, long tag);
-  private static native long[] getCollectedTags();
+  private static native long[] getCollectedTags(int index);
+  private static native void setTag2(Object o, long tag);
 }
diff --git a/test/905-object-free/tracking_free.cc b/test/905-object-free/tracking_free.cc
index 5eed472..c489309 100644
--- a/test/905-object-free/tracking_free.cc
+++ b/test/905-object-free/tracking_free.cc
@@ -31,25 +31,42 @@
 namespace art {
 namespace Test905ObjectFree {
 
-static std::vector<jlong> collected_tags;
+static std::vector<jlong> collected_tags1;
+static std::vector<jlong> collected_tags2;
 
-static void JNICALL ObjectFree(jvmtiEnv* ti_env ATTRIBUTE_UNUSED, jlong tag) {
-  collected_tags.push_back(tag);
+jvmtiEnv* jvmti_env2;
+
+static void JNICALL ObjectFree1(jvmtiEnv* ti_env, jlong tag) {
+  CHECK_EQ(ti_env, jvmti_env);
+  collected_tags1.push_back(tag);
+}
+
+static void JNICALL ObjectFree2(jvmtiEnv* ti_env, jlong tag) {
+  CHECK_EQ(ti_env, jvmti_env2);
+  collected_tags2.push_back(tag);
+}
+
+static void setupObjectFreeCallback(jvmtiEnv* env, jvmtiEventObjectFree callback) {
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.ObjectFree = callback;
+  jvmtiError ret = env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    env->GetErrorName(ret, &err);
+    printf("Error setting callbacks: %s\n", err);
+    env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  }
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_setupObjectFreeCallback(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED) {
-  jvmtiEventCallbacks callbacks;
-  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
-  callbacks.ObjectFree = ObjectFree;
-
-  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error setting callbacks: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-  }
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  setupObjectFreeCallback(jvmti_env, ObjectFree1);
+  JavaVM* jvm = nullptr;
+  env->GetJavaVM(&jvm);
+  CHECK_EQ(jvm->GetEnv(reinterpret_cast<void**>(&jvmti_env2), JVMTI_VERSION_1_2), 0);
+  SetAllCapabilities(jvmti_env2);
+  setupObjectFreeCallback(jvmti_env2, ObjectFree2);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_enableFreeTracking(JNIEnv* env ATTRIBUTE_UNUSED,
@@ -65,20 +82,40 @@
     printf("Error enabling/disabling object-free callbacks: %s\n", err);
     jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
   }
+  ret = jvmti_env2->SetEventNotificationMode(
+      enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+      JVMTI_EVENT_OBJECT_FREE,
+      nullptr);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env2->GetErrorName(ret, &err);
+    printf("Error enabling/disabling object-free callbacks: %s\n", err);
+    jvmti_env2->Deallocate(reinterpret_cast<unsigned char*>(err));
+  }
 }
 
 extern "C" JNIEXPORT jlongArray JNICALL Java_Main_getCollectedTags(JNIEnv* env,
-                                                                   jclass klass ATTRIBUTE_UNUSED) {
-  jlongArray ret = env->NewLongArray(collected_tags.size());
+                                                                   jclass klass ATTRIBUTE_UNUSED,
+                                                                   jint index) {
+  std::vector<jlong>& tags = (index == 0) ? collected_tags1 : collected_tags2;
+  jlongArray ret = env->NewLongArray(tags.size());
   if (ret == nullptr) {
     return ret;
   }
 
-  env->SetLongArrayRegion(ret, 0, collected_tags.size(), collected_tags.data());
-  collected_tags.clear();
+  env->SetLongArrayRegion(ret, 0, tags.size(), tags.data());
+  tags.clear();
 
   return ret;
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_setTag2(JNIEnv* env,
+                                                    jclass klass ATTRIBUTE_UNUSED,
+                                                    jobject obj,
+                                                    jlong tag) {
+  jvmtiError ret = jvmti_env2->SetTag(obj, tag);
+  JvmtiErrorToException(env, ret);
+}
+
 }  // namespace Test905ObjectFree
 }  // namespace art
diff --git a/test/924-threads/expected.txt b/test/924-threads/expected.txt
index 67d20eb..4c0f4ea 100644
--- a/test/924-threads/expected.txt
+++ b/test/924-threads/expected.txt
@@ -19,6 +19,11 @@
 true
 java.lang.ThreadGroup[name=main,maxpri=10]
 class dalvik.system.PathClassLoader
+Subclass
+5
+false
+java.lang.ThreadGroup[name=main,maxpri=10]
+class dalvik.system.PathClassLoader
 5
 5
 0 = NEW
diff --git a/test/924-threads/src/Main.java b/test/924-threads/src/Main.java
index 716f59e..7328560 100644
--- a/test/924-threads/src/Main.java
+++ b/test/924-threads/src/Main.java
@@ -52,6 +52,11 @@
     // Thread has died, check that we can still get info.
     printThreadInfo(t3);
 
+    // Try a subclass of thread.
+    Thread t4 = new Thread("Subclass") {
+    };
+    printThreadInfo(t4);
+
     doStateTests();
 
     doAllThreadsTests();
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index cc015b0..187b383 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -208,7 +208,7 @@
 endef
 
 TARGET_TYPES := host target
-COMPILER_TYPES := jit interpreter optimizing regalloc_gc jit interp-ac
+COMPILER_TYPES := jit interpreter optimizing regalloc_gc jit interp-ac speed-profile
 IMAGE_TYPES := picimage no-image multipicimage
 ALL_ADDRESS_SIZES := 64 32
 
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 808e58a..f1b6132 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -64,6 +64,7 @@
 APP_IMAGE="y"
 VDEX_FILTER=""
 PROFILE="n"
+RANDOM_PROFILE="n"
 
 # if "y", run 'sync' before dalvikvm to make sure all files from
 # build step (e.g. dex2oat) were finished writing.
@@ -273,6 +274,9 @@
     elif [ "x$1" = "x--profile" ]; then
         PROFILE="y"
         shift
+    elif [ "x$1" = "x--random-profile" ]; then
+        RANDOM_PROFILE="y"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -506,17 +510,23 @@
 strip_cmdline="true"
 sync_cmdline="true"
 
-if [ "$PROFILE" = "y" ]; then
+# PROFILE takes precedence over RANDOM_PROFILE, since PROFILE tests require a
+# specific profile to run properly.
+if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
   profman_cmdline="${ANDROID_ROOT}/bin/profman  \
     --apk=$DEX_LOCATION/$TEST_NAME.jar \
-    --dex-location=$DEX_LOCATION/$TEST_NAME.jar \
-    --create-profile-from=$DEX_LOCATION/profile \
-    --reference-profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+    --dex-location=$DEX_LOCATION/$TEST_NAME.jar"
   COMPILE_FLAGS="${COMPILE_FLAGS} --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
   FLAGS="${FLAGS} -Xcompiler-option --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  if [ "$PROFILE" = "y" ]; then
+    profman_cmdline="${profman_cmdline} --create-profile-from=$DEX_LOCATION/profile \
+        --reference-profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  else
+    profman_cmdline="${profman_cmdline} --generate-test-profile=$DEX_LOCATION/$TEST_NAME.prof \
+        --generate-test-profile-seed=0"
+  fi
 fi
 
-
 if [ "$PREBUILD" = "y" ]; then
   mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/oat/$ISA"
   if [ "$APP_IMAGE" = "y" ]; then
@@ -603,7 +613,7 @@
       adb shell mkdir -p $DEX_LOCATION
       adb push $TEST_NAME.jar $DEX_LOCATION
       adb push $TEST_NAME-ex.jar $DEX_LOCATION
-      if [ "$PROFILE" = "y" ]; then
+      if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
         adb push profile $DEX_LOCATION
       fi
     else
@@ -611,7 +621,7 @@
       adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
       adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
       adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
-      if [ "$PROFILE" = "y" ]; then
+      if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
         adb push profile $DEX_LOCATION >/dev/null 2>&1
       fi
 
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 8aa0c55..a7b28de 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -259,7 +259,7 @@
                   "602-deoptimizeable"],
         "description": ["Tests that should fail when the optimizing compiler ",
                         "compiles them non-debuggable."],
-        "variant": "optimizing &  ndebuggable | regalloc_gc & ndebuggable"
+        "variant": "optimizing & ndebuggable | regalloc_gc & ndebuggable | speed-profile & ndebuggable"
     },
     {
         "tests": "596-app-images",
@@ -366,5 +366,12 @@
                   "644-checker-deopt"],
         "description": ["Disabled temporarily until a fix arrives."],
         "bug": "http://b/36371709"
+    },
+    {
+        "tests": ["629-vdex-speed",
+                  "634-vdex-duplicate",
+                  "983-source-transform-verify"],
+        "description": ["Profile driven dexlayout does not work with vdex or dex verifier."],
+        "variant": "speed-profile"
     }
 ]
diff --git a/test/run-all-tests b/test/run-all-tests
index 402c299..a0d2f23 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -155,6 +155,9 @@
     elif [ "x$1" = "x--strace" ]; then
         run_args="${run_args} --strace"
         shift
+    elif [ "x$1" = "x--random-profile" ]; then
+        run_args="${run_args} --random-profile"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
diff --git a/test/run-test b/test/run-test
index a6903ff..91ffdfa 100755
--- a/test/run-test
+++ b/test/run-test
@@ -382,6 +382,9 @@
         filter=$1
         run_args="${run_args} --vdex-filter $filter"
         shift
+    elif [ "x$1" = "x--random-profile" ]; then
+        run_args="${run_args} --random-profile"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
diff --git a/test/testrunner/env.py b/test/testrunner/env.py
index e93fb3a..46244a4 100644
--- a/test/testrunner/env.py
+++ b/test/testrunner/env.py
@@ -105,6 +105,9 @@
 # Do you want to test the optimizing compiler with graph coloring register allocation?
 ART_TEST_OPTIMIZING_GRAPH_COLOR = getEnvBoolean('ART_TEST_OPTIMIZING_GRAPH_COLOR', ART_TEST_FULL)
 
+# Do you want to do run-tests with profiles?
+ART_TEST_SPEED_PROFILE = getEnvBoolean('ART_TEST_SPEED_PROFILE', ART_TEST_FULL)
+
 # Do we want to test PIC-compiled tests ("apps")?
 ART_TEST_PIC_TEST = getEnvBoolean('ART_TEST_PIC_TEST', ART_TEST_FULL)
 # Do you want tracing tests run?
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index 149578d..49dc657 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -147,7 +147,7 @@
   VARIANT_TYPE_DICT['jni'] = {'jni', 'forcecopy', 'checkjni'}
   VARIANT_TYPE_DICT['address_sizes'] = {'64', '32'}
   VARIANT_TYPE_DICT['compiler'] = {'interp-ac', 'interpreter', 'jit', 'optimizing',
-                              'regalloc_gc'}
+                              'regalloc_gc', 'speed-profile'}
 
   for v_type in VARIANT_TYPE_DICT:
     TOTAL_VARIANTS_SET = TOTAL_VARIANTS_SET.union(VARIANT_TYPE_DICT.get(v_type))
@@ -192,6 +192,8 @@
   if env.ART_TEST_OPTIMIZING:
     COMPILER_TYPES.add('optimizing')
     OPTIMIZING_COMPILER_TYPES.add('optimizing')
+  if env.ART_TEST_SPEED_PROFILE:
+    COMPILER_TYPES.add('speed-profile')
 
   # By default we run all 'compiler' variants.
   if not COMPILER_TYPES:
@@ -199,6 +201,7 @@
     COMPILER_TYPES.add('jit')
     COMPILER_TYPES.add('interpreter')
     COMPILER_TYPES.add('interp-ac')
+    COMPILER_TYPES.add('speed-profile')
     OPTIMIZING_COMPILER_TYPES.add('optimizing')
 
   if env.ART_TEST_RUN_TEST_RELOCATE:
@@ -389,6 +392,8 @@
         options_test += ' --interpreter --verify-soft-fail'
       elif compiler == 'jit':
         options_test += ' --jit'
+      elif compiler == 'speed-profile':
+        options_test += ' --random-profile'
 
       if relocate == 'relocate':
         options_test += ' --relocate'
@@ -881,6 +886,8 @@
     IMAGE_TYPES.add('no-image')
   if options['optimizing']:
     COMPILER_TYPES.add('optimizing')
+  if options['speed_profile']:
+    COMPILER_TYPES.add('speed-profile')
   if options['trace']:
     TRACE_TYPES.add('trace')
   if options['gcstress']:
diff --git a/tools/art b/tools/art
index 91d6e27..f5e0860 100644
--- a/tools/art
+++ b/tools/art
@@ -16,18 +16,16 @@
 # shell dialect that should work on the host (e.g. bash), and
 # Android (e.g. mksh).
 
-function follow_links() {
-  if [ z"$BASH_SOURCE" != z ]; then
-    file="$BASH_SOURCE"
-  else
-    file="$0"
-  fi
-  while [ -h "$file" ]; do
-    # On Mac OS, readlink -f doesn't work.
-    file="$(readlink "$file")"
-  done
-  echo "$file"
-}
+# Follow all sym links to get the program name.
+if [ z"$BASH_SOURCE" != z ]; then
+  PROG_NAME="$BASH_SOURCE"
+else
+  PROG_NAME="$0"
+fi
+while [ -h "$PROG_NAME" ]; do
+  # On Mac OS, readlink -f doesn't work.
+  PROG_NAME="$(readlink "$PROG_NAME")"
+done
 
 function find_libdir() {
   # Get the actual file, $DALVIKVM may be a symbolic link.
@@ -39,9 +37,34 @@
   fi
 }
 
+ARGS_WITH_INTERPRET_ONLY=
+function replace_compiler_filter_with_interepret_only() {
+  ARGS_WITH_INTERPRET_ONLY=("$@")
+
+  found="false"
+  ((index=0))
+  while ((index <= $#)); do
+    what="${ARGS_WITH_INTERPRET_ONLY[$index]}"
+
+    case "$what" in
+      --compiler-filter=*)
+        ARGS_WITH_INTERPRET_ONLY[$index]="--compiler-filter=interpret-only"
+        found="true"
+        ;;
+    esac
+
+    ((index++))
+    shift
+  done
+  if [ "$found" != "true" ]; then
+    ARGS_WITH_INTERPRET_ONLY=(-Xcompiler-option --compiler-filter=interpret-only "${ARGS_WITH_INTERPRET_ONLY[@]}")
+  fi
+}
+
 invoke_with=
 DALVIKVM=dalvikvm
 LIBART=libart.so
+JIT_PROFILE=false
 
 while true; do
   if [ "$1" = "--invoke-with" ]; then
@@ -63,6 +86,9 @@
   elif [ "$1" = "--perf-report" ]; then
     PERF="report"
     shift
+  elif [ "$1" = "--profile" ]; then
+    JIT_PROFILE="true"
+    shift
   elif expr "$1" : "--" >/dev/null 2>&1; then
     echo "unknown option: $1" 1>&2
     exit 1
@@ -71,7 +97,6 @@
   fi
 done
 
-PROG_NAME="$(follow_links)"
 PROG_DIR="$(cd "${PROG_NAME%/*}" ; pwd -P)"
 ANDROID_ROOT=$PROG_DIR/..
 LIBDIR=$(find_libdir)
@@ -92,20 +117,65 @@
   DEBUG_OPTION="-Xcompiler-option --generate-debug-info"
 fi
 
-# We use the PIC core image to work with perf.
-ANDROID_DATA=$ANDROID_DATA \
-  ANDROID_ROOT=$ANDROID_ROOT \
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
-  PATH=$ANDROID_ROOT/bin:$PATH \
-  LD_USE_LOAD_BIAS=1 \
-  $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
-    -XXlib:$LIBART \
-    -Xnorelocate \
-    -Ximage:$ANDROID_ROOT/framework/core.art \
-    $DEBUG_OPTION \
-    "$@"
+PROFILE_OPTION=""
+EXIT_STATUS=0
+if [ "$JIT_PROFILE" = true ]; then
+  # Create the profile. The runtime expects profiles to be created before
+  # execution.
+  PROFILE_PATH="$ANDROID_DATA/primary.prof"
+  touch $PROFILE_PATH
 
-EXIT_STATUS=$?
+  # Replace the compiler filter with interpret-only so that we
+  # can capture the profile.
+  replace_compiler_filter_with_interepret_only "$@"
+
+  ANDROID_DATA=$ANDROID_DATA \
+    ANDROID_ROOT=$ANDROID_ROOT \
+    LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
+    PATH=$ANDROID_ROOT/bin:$PATH \
+    LD_USE_LOAD_BIAS=1 \
+    $ANDROID_ROOT/bin/$DALVIKVM $lib \
+      -XXlib:$LIBART \
+      -Xnorelocate \
+      -Ximage:$ANDROID_ROOT/framework/core.art \
+      -Xjitsaveprofilinginfo \
+      -Xps-min-methods-to-save:0 \
+      -Xps-min-classes-to-save:0 \
+      -Xps-min-notification-before-wake:10 \
+      -Xps-profile-path:$PROFILE_PATH \
+      -Xusejit:true \
+      "${ARGS_WITH_INTERPRET_ONLY[@]}" \
+      &> "$ANDROID_DATA/profile_gen.log"
+
+  EXIT_STATUS=$?
+
+  if [ $EXIT_STATUS = 0 ]; then
+    # Wipe dalvik-cache to prepare it for the next invocation.
+    rm -rf $ANDROID_DATA/dalvik-cache/{arm,arm64,x86,x86_64}/*
+  else
+    cat "$ANDROID_DATA/profile_gen.log"
+  fi
+
+  PROFILE_OPTION="-Xcompiler-option --profile-file=$PROFILE_PATH"
+fi
+
+# Only run the second invocation if the first one finished successfully.
+if [ $EXIT_STATUS = 0 ]; then
+  ANDROID_DATA=$ANDROID_DATA \
+    ANDROID_ROOT=$ANDROID_ROOT \
+    LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
+    PATH=$ANDROID_ROOT/bin:$PATH \
+    LD_USE_LOAD_BIAS=1 \
+    $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
+      -XXlib:$LIBART \
+      -Xnorelocate \
+      -Ximage:$ANDROID_ROOT/framework/core.art \
+      $DEBUG_OPTION \
+      $PROFILE_OPTION \
+      "$@"
+
+  EXIT_STATUS=$?
+fi
 
 if [ z"$PERF" != z ]; then
   if [ z"$PERF" = zreport ]; then