Merge "Use GDB to dump threads in test timeouts, if available."
diff --git a/Android.mk b/Android.mk
index ab3eca4..8859d3a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -241,7 +241,7 @@
 
 # Dexdump/list regression test.
 .PHONY: test-art-host-dexdump
-test-art-host-dexdump: $(addprefix $(HOST_OUT_EXECUTABLES)/, dexdump2 dexlist2)
+test-art-host-dexdump: $(addprefix $(HOST_OUT_EXECUTABLES)/, dexdump2 dexlist)
 	ANDROID_HOST_OUT=$(realpath $(HOST_OUT)) art/test/dexdump/run-all-tests
 
 # Valgrind. Currently only 32b gtests.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1db654a..c88d677 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -106,15 +106,14 @@
   dexdump2
 
 # The dexlist test requires an image and the dexlist utility.
-# TODO: rename into dexlist when migration completes
 ART_GTEST_dexlist_test_HOST_DEPS := \
   $(HOST_CORE_IMAGE_default_no-pic_64) \
   $(HOST_CORE_IMAGE_default_no-pic_32) \
-  $(HOST_OUT_EXECUTABLES)/dexlist2
+  $(HOST_OUT_EXECUTABLES)/dexlist
 ART_GTEST_dexlist_test_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_default_no-pic_64) \
   $(TARGET_CORE_IMAGE_default_no-pic_32) \
-  dexlist2
+  dexlist
 
 # The imgdiag test has dependencies on core.oat since it needs to load it during the test.
 # For the host, also add the installed tool (in the base size, that should suffice). For the
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index be913fe..31c3808 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -105,7 +105,8 @@
       // Don't devirt if we are in a different dex file since we can't have direct invokes in
       // another dex file unless we always put a direct / patch pointer.
       devirt_target = nullptr;
-      current_dex_cache.Assign(runtime->GetClassLinker()->FindDexCache(*it->target_dex_file_));
+      current_dex_cache.Assign(runtime->GetClassLinker()->FindDexCache(
+          soa.Self(), *it->target_dex_file_));
       CHECK(current_dex_cache.Get() != nullptr);
       DexCompilationUnit cu(
           mUnit->GetCompilationUnit(), mUnit->GetClassLoader(), mUnit->GetClassLinker(),
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 83f391d..8f1987a 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -31,7 +31,7 @@
 namespace art {
 
 inline mirror::DexCache* CompilerDriver::GetDexCache(const DexCompilationUnit* mUnit) {
-  return mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile(), false);
+  return mUnit->GetClassLinker()->FindDexCache(Thread::Current(), *mUnit->GetDexFile(), false);
 }
 
 inline mirror::ClassLoader* CompilerDriver::GetClassLoader(ScopedObjectAccess& soa,
@@ -87,7 +87,7 @@
 }
 
 inline mirror::DexCache* CompilerDriver::FindDexCache(const DexFile* dex_file) {
-  return Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file, false);
+  return Runtime::Current()->GetClassLinker()->FindDexCache(Thread::Current(), *dex_file, false);
 }
 
 inline ArtField* CompilerDriver::ResolveField(
@@ -339,7 +339,8 @@
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
     // dex cache, check that this resolved method is where we expect it.
     CHECK_EQ(target_method->dex_file, mUnit->GetDexFile());
-    DCHECK_EQ(dex_cache.Get(), mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile(), false));
+    DCHECK_EQ(dex_cache.Get(), mUnit->GetClassLinker()->FindDexCache(
+        soa.Self(), *mUnit->GetDexFile(), false));
     CHECK_EQ(referrer_class->GetDexCache()->GetResolvedMethod(
         target_method->dex_method_index, pointer_size),
              resolved_method) << PrettyMethod(resolved_method);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 89668f2..6d3a960 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1175,7 +1175,7 @@
     {
       ScopedObjectAccess soa(Thread::Current());
       mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-          dex_file, false);
+          soa.Self(), dex_file, false);
       mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
       if (resolved_class == nullptr) {
         // Erroneous class.
@@ -1201,7 +1201,8 @@
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file, false)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+        soa.Self(), dex_file, false)));
     class_linker->ResolveString(dex_file, string_idx, dex_cache);
     result = true;
   }
@@ -1227,7 +1228,8 @@
     *equals_referrers_class = false;
   }
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file, false);
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1265,7 +1267,7 @@
                                                             uint32_t type_idx) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      dex_file, false);
+      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1294,7 +1296,8 @@
                                         uintptr_t* direct_type_ptr, bool* out_is_finalizable) {
   ScopedObjectAccess soa(Thread::Current());
   Runtime* runtime = Runtime::Current();
-  mirror::DexCache* dex_cache = runtime->GetClassLinker()->FindDexCache(dex_file, false);
+  mirror::DexCache* dex_cache = runtime->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     return false;
@@ -1423,7 +1426,8 @@
   {
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache_handle(
-        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile(), false)));
+        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(
+            soa.Self(), *mUnit->GetDexFile(), false)));
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
     resolved_field =
@@ -1473,7 +1477,8 @@
   {
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache_handle(
-        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile(), false)));
+        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(
+            soa.Self(), *mUnit->GetDexFile(), false)));
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
     resolved_field =
@@ -1659,7 +1664,8 @@
   // Try to resolve the method and compiling method's class.
   StackHandleScope<3> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(
-      hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile(), false)));
+      hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(
+          soa.Self(), *mUnit->GetDexFile(), false)));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
   uint32_t method_idx = target_method->dex_method_index;
@@ -1911,7 +1917,8 @@
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file, false)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+        soa.Self(), dex_file, false)));
     // Resolve the class.
     mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache,
                                                      class_loader);
@@ -2090,7 +2097,8 @@
        * This is to ensure the class is structurally sound for compilation. An unsound class
        * will be rejected by the verifier and later skipped during compilation in the compiler.
        */
-      Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file, false)));
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+          soa.Self(), dex_file, false)));
       std::string error_msg;
       if (verifier::MethodVerifier::VerifyClass(soa.Self(), &dex_file, dex_cache, class_loader,
                                                 &class_def, true, &error_msg) ==
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index e35d07d..1107599 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -108,7 +108,7 @@
   ScopedObjectAccess soa(Thread::Current());
   ASSERT_TRUE(java_lang_dex_file_ != nullptr);
   const DexFile& dex = *java_lang_dex_file_;
-  mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
+  mirror::DexCache* dex_cache = class_linker_->FindDexCache(soa.Self(), dex);
   EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings());
   for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
     const mirror::String* string = dex_cache->GetResolvedString(i);
@@ -210,8 +210,8 @@
   CompileAll(class_loader);
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<1> hs(self);
   ScopedObjectAccess soa(self);
+  StackHandleScope<1> hs(self);
   Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
       reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
   mirror::Class* klass = class_linker->FindClass(self, "LStaticLeafMethods;", h_loader);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 0e0b224..fdf904d 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -617,7 +617,8 @@
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file_)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
+        Thread::Current(), *dex_file_)));
     ArtMethod* method = linker->ResolveMethod(
         *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr,
         invoke_type);
@@ -668,7 +669,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
     if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
-      dex_cache_ = class_linker_->FindDexCache(*dex_file);
+      dex_cache_ = class_linker_->FindDexCache(Thread::Current(), *dex_file);
     }
     return true;
   }
@@ -798,7 +799,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     MethodReference ref = patch.TargetMethod();
     mirror::DexCache* dex_cache =
-        (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(*ref.dex_file);
+        (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(
+            Thread::Current(), *ref.dex_file);
     ArtMethod* method = dex_cache->GetResolvedMethod(
         ref.dex_method_index, class_linker_->GetImagePointerSize());
     CHECK(method != nullptr);
@@ -832,7 +834,7 @@
   mirror::Class* GetTargetType(const LinkerPatch& patch)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
-        ? dex_cache_ : class_linker_->FindDexCache(*patch.TargetTypeDexFile());
+        ? dex_cache_ : class_linker_->FindDexCache(Thread::Current(), *patch.TargetTypeDexFile());
     mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
     CHECK(type != nullptr);
     return type;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 7b42db8..23ab94e 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -902,7 +902,7 @@
   StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          *dex_compilation_unit_->GetDexFile())));
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
@@ -912,7 +912,7 @@
 
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the method's class is stored in the DexCache's type array.
@@ -1228,7 +1228,7 @@
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(dex_file)));
+      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
 
   return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
 }
@@ -1245,7 +1245,8 @@
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(*dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
@@ -1264,7 +1265,8 @@
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(*dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   ArtField* resolved_field = compiler_driver_->ResolveField(
@@ -1277,7 +1279,7 @@
 
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the field's class is stored in the DexCache's type array.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 503187b..7c60026 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1071,12 +1071,6 @@
       << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
 }
 
-void SlowPathCode::RecordPcInfo(CodeGenerator* codegen,
-                                HInstruction* instruction,
-                                uint32_t dex_pc) {
-  codegen->RecordPcInfo(instruction, dex_pc, this);
-}
-
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
   RegisterSet* register_set = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 938369b..cdd4675 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -81,7 +81,6 @@
 
   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
-  void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
 
   bool IsCoreRegisterSaved(int reg) const {
     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 62026f3..0640179 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -142,24 +142,22 @@
 
 class BoundsCheckSlowPathARM : public SlowPathCodeARM {
  public:
-  BoundsCheckSlowPathARM(HBoundsCheck* instruction,
-                         Location index_location,
-                         Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction)
+      : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_,
+        locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        length_location_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     arm_codegen->InvokeRuntime(
@@ -172,8 +170,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM);
 };
@@ -263,17 +259,12 @@
 
 class TypeCheckSlowPathARM : public SlowPathCodeARM {
  public:
-  TypeCheckSlowPathARM(HInstruction* instruction,
-                       Location class_to_check,
-                       Location object_class,
-                       uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -285,20 +276,25 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimNot,
-        object_class_,
+        object_class,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
-      arm_codegen->InvokeRuntime(
-          QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this);
+      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -309,9 +305,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
 };
@@ -2731,11 +2724,9 @@
         Register temp = locations->GetTemp(0).AsRegister<Register>();
 
         // temp = reg1 / reg2  (integer division)
-        // temp = temp * reg2
-        // dest = reg1 - temp
+        // dest = reg1 - temp * reg2
         __ sdiv(temp, reg1, reg2);
-        __ mul(temp, temp, reg2);
-        __ sub(out.AsRegister<Register>(), reg1, ShifterOperand(temp));
+        __ mls(out.AsRegister<Register>(), temp, reg2, reg1);
       } else {
         InvokeRuntimeCallingConvention calling_convention;
         DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegister<Register>());
@@ -2905,7 +2896,7 @@
         // If the shift is > 32 bits, override the high part
         __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
-        __ Lsl(o_h, low, temp, false, PL);
+        __ Lsl(o_h, low, temp, PL);
         // Shift the low part
         __ Lsl(o_l, low, o_l);
       } else if (op->IsShr()) {
@@ -2919,7 +2910,7 @@
         // If the shift is > 32 bits, override the low part
         __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
-        __ Asr(o_l, high, temp, false, PL);
+        __ Asr(o_l, high, temp, PL);
         // Shift the high part
         __ Asr(o_h, high, o_h);
       } else {
@@ -2931,7 +2922,7 @@
         __ orr(o_l, o_l, ShifterOperand(temp));
         __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
         __ it(PL);
-        __ Lsr(o_l, high, temp, false, PL);
+        __ Lsr(o_l, high, temp, PL);
         __ Lsr(o_h, high, o_h);
       }
       break;
@@ -3901,8 +3892,8 @@
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(
-      instruction, locations->InAt(0), locations->InAt(1));
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   Register index = locations->InAt(0).AsRegister<Register>();
@@ -4346,6 +4337,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The out register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -4375,8 +4367,7 @@
   } else {
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction);
     codegen_->AddSlowPath(slow_path);
     __ b(slow_path->GetEntryLabel(), NE);
     __ LoadImmediate(out, 1);
@@ -4399,6 +4390,7 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -4409,8 +4401,8 @@
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 25b3ea2..8035461 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -191,23 +191,19 @@
 
 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
-                           Location index_location,
-                           Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
-
+  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
-        length_location_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
+        locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
+        locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
@@ -219,8 +215,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
 };
@@ -403,20 +397,17 @@
 
 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  TypeCheckSlowPathARM64(HInstruction* instruction,
-                         Location class_to_check,
-                         Location object_class,
-                         uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location class_to_check = locations->InAt(1);
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    uint32_t dex_pc = instruction_->GetDexPc();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -425,12 +416,12 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
-        object_class_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
+        class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
+        object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
-          QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this);
+          QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -438,7 +429,7 @@
                            const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
@@ -450,9 +441,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
 };
@@ -1602,9 +1590,8 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(
-      instruction, locations->InAt(0), locations->InAt(1));
+  BoundsCheckSlowPathARM64* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
@@ -1616,17 +1603,17 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
   Register obj = InputRegisterAt(instruction, 0);;
   Register cls = InputRegisterAt(instruction, 1);;
   Register obj_cls = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
 
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-      instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc());
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
@@ -2240,6 +2227,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
+  // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -2269,8 +2257,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     SlowPathCodeARM64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction);
     codegen_->AddSlowPath(slow_path);
     __ B(ne, slow_path->GetEntryLabel());
     __ Mov(out, 1);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 093d786..e4188e4 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -112,23 +112,19 @@
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction,
-                            Location index_location,
-                            Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(index_location_,
+    codegen->EmitParallelMoves(locations->InAt(0),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimInt,
-                               length_location_,
+                               locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
@@ -144,8 +140,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
 };
@@ -334,17 +328,13 @@
 
 class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  TypeCheckSlowPathMIPS64(HInstruction* instruction,
-                          Location class_to_check,
-                          Location object_class,
-                          uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
+    uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -355,17 +345,17 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(class_to_check_,
+    codegen->EmitParallelMoves(locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class_,
+                               object_class,
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
                                     instruction_,
-                                    dex_pc_,
+                                    dex_pc,
                                     this);
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
@@ -376,7 +366,7 @@
                            const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
@@ -388,9 +378,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
 };
@@ -1590,10 +1577,8 @@
 
 void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathMIPS64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(
-      instruction,
-      locations->InAt(0),
-      locations->InAt(1));
+  BoundsCheckSlowPathMIPS64* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   GpuRegister index = locations->InAt(0).AsRegister<GpuRegister>();
@@ -1616,6 +1601,7 @@
       LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -1625,11 +1611,8 @@
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
 
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(
-      instruction,
-      locations->InAt(1),
-      Location::RegisterLocation(obj_cls),
-      instruction->GetDexPc());
+  SlowPathCodeMIPS64* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // TODO: avoid this check if we know obj is not null.
@@ -2270,6 +2253,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
+  // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -2296,10 +2280,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     SlowPathCodeMIPS64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
-                                                             locations->InAt(1),
-                                                             locations->Out(),
-                                                             instruction->GetDexPc());
+        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
     codegen_->AddSlowPath(slow_path);
     __ Bnec(out, cls, slow_path->GetEntryLabel());
     __ LoadConst32(out, 1);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 72c690d..e8aa61d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -116,24 +116,20 @@
 
 class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
  public:
-  BoundsCheckSlowPathX86(HBoundsCheck* instruction,
-                         Location index_location,
-                         Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     x86_codegen->EmitParallelMoves(
-        index_location_,
+        locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        length_location_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
@@ -148,8 +144,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
 };
@@ -280,15 +274,12 @@
 
 class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
  public:
-  TypeCheckSlowPathX86(HInstruction* instruction,
-                       Location class_to_check,
-                       Location object_class)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class) {}
+  explicit TypeCheckSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -300,10 +291,10 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     x86_codegen->EmitParallelMoves(
-        class_to_check_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimNot,
-        object_class_,
+        object_class,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimNot);
 
@@ -332,8 +323,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
 };
@@ -4357,7 +4346,7 @@
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86* slow_path =
-    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction, index_loc, length_loc);
+    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -4830,6 +4819,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this register too.
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -4866,8 +4856,7 @@
   } else {
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-        instruction, locations->InAt(1), locations->Out());
+    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction);
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -4890,6 +4879,7 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -4899,8 +4889,8 @@
   Location cls = locations->InAt(1);
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-      instruction, locations->InAt(1), locations->GetTemp(0));
+  SlowPathCodeX86* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 820ec78..ff52f4f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -170,24 +170,21 @@
 
 class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  BoundsCheckSlowPathX86_64(HBoundsCheck* instruction,
-                            Location index_location,
-                            Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
+    : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_,
+        locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        length_location_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
@@ -200,8 +197,6 @@
 
  private:
   HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
 };
@@ -293,17 +288,14 @@
 
 class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  TypeCheckSlowPathX86_64(HInstruction* instruction,
-                          Location class_to_check,
-                          Location object_class,
-                          uint32_t dex_pc)
-      : instruction_(instruction),
-        class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+  explicit TypeCheckSlowPathX86_64(HInstruction* instruction)
+      : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
+                                                        : locations->Out();
+    uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -315,23 +307,23 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_,
+        locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimNot,
-        object_class_,
+        object_class,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
                                  instruction_,
-                                 dex_pc_,
+                                 dex_pc,
                                  this);
     } else {
       DCHECK(instruction_->IsCheckCast());
       x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
                                  instruction_,
-                                 dex_pc_,
+                                 dex_pc,
                                  this);
     }
 
@@ -347,9 +339,6 @@
 
  private:
   HInstruction* const instruction_;
-  const Location class_to_check_;
-  const Location object_class_;
-  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
 };
@@ -4195,7 +4184,7 @@
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86_64* slow_path =
-    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction, index_loc, length_loc);
+    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -4653,6 +4642,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this register too.
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -4688,8 +4678,7 @@
   } else {
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction);
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -4712,6 +4701,7 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
@@ -4721,8 +4711,8 @@
   Location cls = locations->InAt(1);
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  SlowPathCodeX86_64* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ff90f32..112d42e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -182,10 +182,10 @@
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
-    resolved_method = class_linker->FindDexCache(*ref.dex_file)->GetResolvedMethod(
+    resolved_method = class_linker->FindDexCache(soa.Self(), *ref.dex_file)->GetResolvedMethod(
         ref.dex_method_index, class_linker->GetImagePointerSize());
   } else {
-    resolved_method = class_linker->FindDexCache(caller_dex_file)->GetResolvedMethod(
+    resolved_method = class_linker->FindDexCache(soa.Self(), caller_dex_file)->GetResolvedMethod(
         method_index, class_linker->GetImagePointerSize());
   }
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index df6e550..0ac26de 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -132,6 +132,12 @@
   // with
   //    ADD tmp, a, b
   //    NEG dst, tmp
+  // Note that we cannot optimize `(-a) + (-b)` to `-(a + b)` for floating-point.
+  // When `a` is `-0.0` and `b` is `0.0`, the former expression yields `0.0`,
+  // while the later yields `-0.0`.
+  if (!Primitive::IsIntegralType(binop->GetType())) {
+    return false;
+  }
   binop->ReplaceInput(left_neg->GetInput(), 0);
   binop->ReplaceInput(right_neg->GetInput(), 1);
   left_neg->GetBlock()->RemoveInstruction(left_neg);
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 806fd7a..69a3e62 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -103,7 +103,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(kArtMethodRegister));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
@@ -989,10 +989,7 @@
   DCHECK_ALIGNED(value_offset, 4);
   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
 
-  // temp cannot overflow because we cannot allocate a String object with size 4GiB or greater.
-  __ add(temp, temp, ShifterOperand(temp));
   __ LoadImmediate(temp1, value_offset);
-  __ add(temp, temp, ShifterOperand(value_offset));
 
   // Loop to compare strings 2 characters at a time starting at the front of the string.
   // Ok to do this because strings with an odd length are zero-padded.
@@ -1002,8 +999,8 @@
   __ cmp(out, ShifterOperand(temp2));
   __ b(&return_false, NE);
   __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
-  __ cmp(temp1, ShifterOperand(temp));
-  __ b(&loop, LO);
+  __ subs(temp, temp, ShifterOperand(sizeof(uint32_t) /  sizeof(uint16_t)));
+  __ b(&loop, GT);
 
   // Return true and exit the function.
   // If loop does not result in returning false, we return true.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index a5332ea..0171d69 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -112,7 +112,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           LocationFrom(kArtMethodRegister));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b7126b2..be076cd 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -141,7 +141,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(EAX));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 15fbac1..1f35b59 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -132,7 +132,7 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(
           invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
-      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2a76991..6f251e8 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -540,11 +540,14 @@
                                                      CompilerDriver* compiler_driver,
                                                      const DexCompilationUnit& dex_compilation_unit,
                                                      PassObserver* pass_observer) const {
-  StackHandleScopeCollection handles(Thread::Current());
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  soa.Self()->TransitionFromRunnableToSuspended(kNative);
   RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
                    dex_compilation_unit, pass_observer, &handles);
 
   if (graph->HasTryCatch()) {
+    soa.Self()->TransitionFromSuspendedToRunnable();
     return nullptr;
   }
 
@@ -582,6 +585,8 @@
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
   pass_observer->DumpDisassembly();
+
+  soa.Self()->TransitionFromSuspendedToRunnable();
   return compiled_method;
 }
 
@@ -709,7 +714,8 @@
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<4> hs(soa.Self());
     ClassLinker* class_linker = dex_compilation_unit.GetClassLinker();
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+        soa.Self(), dex_file)));
     Handle<mirror::ClassLoader> loader(hs.NewHandle(
         soa.Decode<mirror::ClassLoader*>(class_loader)));
     ArtMethod* art_method = compiler_driver->ResolveMethod(
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 824f28e..516638b 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -79,6 +79,8 @@
     : HOptimization(graph, name),
       handles_(handles),
       worklist_(graph->GetArena(), kDefaultWorklistSize) {
+  // Mutator lock is required for NewHandle, but annotalysis ignores constructors.
+  ScopedObjectAccess soa(Thread::Current());
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
   object_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject));
   string_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangString));
@@ -87,7 +89,6 @@
       handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangThrowable));
 
   if (kIsDebugBuild) {
-    ScopedObjectAccess soa(Thread::Current());
     DCHECK(ReferenceTypeInfo::IsValidHandle(object_class_handle_));
     DCHECK(ReferenceTypeInfo::IsValidHandle(class_class_handle_));
     DCHECK(ReferenceTypeInfo::IsValidHandle(string_class_handle_));
@@ -362,7 +363,8 @@
     if (kIsDebugBuild) {
       ScopedObjectAccess soa(Thread::Current());
       ClassLinker* cl = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = cl->FindDexCache(instr->AsInvoke()->GetDexFile(), false);
+      mirror::DexCache* dex_cache = cl->FindDexCache(
+          soa.Self(), instr->AsInvoke()->GetDexFile(), false);
       ArtMethod* method = dex_cache->GetResolvedMethod(
           instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize());
       DCHECK(method != nullptr);
@@ -393,7 +395,8 @@
   DCHECK_EQ(instr->GetType(), Primitive::kPrimNot);
 
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier.
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
@@ -431,7 +434,7 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  mirror::DexCache* dex_cache = cl->FindDexCache(info.GetDexFile());
+  mirror::DexCache* dex_cache = cl->FindDexCache(soa.Self(), info.GetDexFile(), false);
   ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache);
   // TODO: There are certain cases where we can't resolve the field.
   // b/21914925 is open to keep track of a repro case for this issue.
@@ -450,7 +453,7 @@
 void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile());
+      Runtime::Current()->GetClassLinker()->FindDexCache(soa.Self(), instr->GetDexFile(), false);
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
   // TODO: investigating why we are still getting unresolved classes: b/22821472.
@@ -633,7 +636,7 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile());
+  mirror::DexCache* dex_cache = cl->FindDexCache(soa.Self(), instr->GetDexFile());
   ArtMethod* method = dex_cache->GetResolvedMethod(
       instr->GetDexMethodIndex(), cl->GetImagePointerSize());
   mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false);
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 0e3e08c..807beda 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -137,10 +137,14 @@
         if (rs_ == kNoRegister) {
           // Immediate shift.
           if (shift_ == RRX) {
+            DCHECK_EQ(immed_, 0u);
             // RRX is encoded as an ROR with imm 0.
             return ROR << 4 | static_cast<uint32_t>(rm_);
           } else {
-            uint32_t imm3 = immed_ >> 2;
+            DCHECK((1 <= immed_ && immed_ <= 31) ||
+                   (immed_ == 0u && shift_ == LSL) ||
+                   (immed_ == 32u && (shift_ == ASR || shift_ == LSR)));
+            uint32_t imm3 = (immed_ >> 2) & 7 /* 0b111*/;
             uint32_t imm2 = immed_ & 3U /* 0b11 */;
 
             return imm3 << 12 | imm2 << 6 | shift_ << 4 |
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index ef60fef..7825457 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -375,6 +375,13 @@
   kItE = kItElse
 };
 
+// Set condition codes request.
+enum SetCc {
+  kCcDontCare,  // Allows prioritizing 16-bit instructions on Thumb2 whether they set CCs or not.
+  kCcSet,
+  kCcKeep,
+};
+
 constexpr uint32_t kNoItCondition = 3;
 constexpr uint32_t kInvalidModifiedImmediate = -1;
 
@@ -392,25 +399,61 @@
   virtual bool IsThumb() const = 0;
 
   // Data-processing instructions.
-  virtual void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
+                    Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void ands(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    and_(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void eors(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    eor(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    sub(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    rsb(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void add(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    add(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void adcs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    adc(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void sbcs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    sbc(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void rscs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    rsc(rd, rn, so, cond, kCcSet);
+  }
 
   virtual void tst(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
@@ -420,16 +463,33 @@
 
   virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  virtual void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void mov(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void movs(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    orr(rd, rn, so, cond, kCcSet);
+  }
 
-  virtual void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void mov(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
-  virtual void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void movs(Register rd, const ShifterOperand& so, Condition cond = AL) {
+    mov(rd, so, cond, kCcSet);
+  }
+
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void bics(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    bic(rd, rn, so, cond, kCcSet);
+  }
+
+  virtual void mvn(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) {
+    mvn(rd, so, cond, kCcSet);
+  }
 
   // Miscellaneous data-processing instructions.
   virtual void clz(Register rd, Register rm, Condition cond = AL) = 0;
@@ -697,25 +757,68 @@
 
   // Convenience shift instructions. Use mov instruction with shifter operand
   // for variants setting the status flags or using a register shift count.
-  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Rrx(Register rd, Register rm, bool setcc = false,
-                   Condition cond = AL) = 0;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
-  virtual void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Asr(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
-  virtual void Ror(Register rd, Register rm, Register rn, bool setcc = false,
-                   Condition cond = AL) = 0;
+  void Lsls(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Lsl(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Lsrs(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Lsr(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Asrs(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Asr(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Rors(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) {
+    Ror(rd, rm, shift_imm, cond, kCcSet);
+  }
+
+  virtual void Rrx(Register rd, Register rm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Rrxs(Register rd, Register rm, Condition cond = AL) {
+    Rrx(rd, rm, cond, kCcSet);
+  }
+
+  virtual void Lsl(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Lsls(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Lsl(rd, rm, rn, cond, kCcSet);
+  }
+
+  virtual void Lsr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Lsrs(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Lsr(rd, rm, rn, cond, kCcSet);
+  }
+
+  virtual void Asr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Asrs(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Asr(rd, rm, rn, cond, kCcSet);
+  }
+
+  virtual void Ror(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  void Rors(Register rd, Register rm, Register rn, Condition cond = AL) {
+    Ror(rd, rm, rn, cond, kCcSet);
+  }
 
   // Returns whether the `immediate` can fit in a `ShifterOperand`. If yes,
   // `shifter_op` contains the operand.
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6e60ddc..d91ddee 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -57,126 +57,94 @@
 }
 
 void Arm32Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), AND, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), AND, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), EOR, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), EOR, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SUB, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), SUB, set_cc, rn, rd, so);
 }
 
 void Arm32Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSB, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), RSB, set_cc, rn, rd, so);
 }
 
-void Arm32Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), RSB, 1, rn, rd, so);
-}
-
-
 void Arm32Assembler::add(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADD, 0, rn, rd, so);
-}
-
-
-void Arm32Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), ADD, 1, rn, rd, so);
-}
-
-
-void Arm32Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), SUB, 1, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), ADD, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), ADC, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SBC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), SBC, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), RSC, set_cc, rn, rd, so);
 }
 
 
 void Arm32Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TST, 1, rn, R0, so);
+  EmitType01(cond, so.type(), TST, kCcSet, rn, R0, so);
 }
 
 
 void Arm32Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TEQ, 1, rn, R0, so);
+  EmitType01(cond, so.type(), TEQ, kCcSet, rn, R0, so);
 }
 
 
 void Arm32Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), CMP, 1, rn, R0, so);
+  EmitType01(cond, so.type(), CMP, kCcSet, rn, R0, so);
 }
 
 
 void Arm32Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), CMN, 1, rn, R0, so);
+  EmitType01(cond, so.type(), CMN, kCcSet, rn, R0, so);
 }
 
 
-void Arm32Assembler::orr(Register rd, Register rn,
-                    const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 0, rn, rd, so);
+void Arm32Assembler::orr(Register rd, Register rn, const ShifterOperand& so,
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), ORR, set_cc, rn, rd, so);
 }
 
 
-void Arm32Assembler::orrs(Register rd, Register rn,
-                        const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 1, rn, rd, so);
-}
-
-
-void Arm32Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 0, R0, rd, so);
-}
-
-
-void Arm32Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 1, R0, rd, so);
+void Arm32Assembler::mov(Register rd, const ShifterOperand& so,
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), MOV, set_cc, R0, rd, so);
 }
 
 
 void Arm32Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), BIC, 0, rn, rd, so);
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), BIC, set_cc, rn, rd, so);
 }
 
 
-void Arm32Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 0, R0, rd, so);
-}
-
-
-void Arm32Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 1, R0, rd, so);
+void Arm32Assembler::mvn(Register rd, const ShifterOperand& so,
+                         Condition cond, SetCc set_cc) {
+  EmitType01(cond, so.type(), MVN, set_cc, R0, rd, so);
 }
 
 
@@ -573,7 +541,7 @@
 
 
 void Arm32Assembler::MarkExceptionHandler(Label* label) {
-  EmitType01(AL, 1, TST, 1, PC, R0, ShifterOperand(0));
+  EmitType01(AL, 1, TST, kCcSet, PC, R0, ShifterOperand(0));
   Label l;
   b(&l);
   EmitBranch(AL, label, false);
@@ -590,7 +558,7 @@
 void Arm32Assembler::EmitType01(Condition cond,
                                 int type,
                                 Opcode opcode,
-                                int set_cc,
+                                SetCc set_cc,
                                 Register rn,
                                 Register rd,
                                 const ShifterOperand& so) {
@@ -599,7 +567,7 @@
   int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
                      type << kTypeShift |
                      static_cast<int32_t>(opcode) << kOpcodeShift |
-                     set_cc << kSShift |
+                     (set_cc == kCcSet ? 1 : 0) << kSShift |
                      static_cast<int32_t>(rn) << kRnShift |
                      static_cast<int32_t>(rd) << kRdShift |
                      so.encodingArm();
@@ -1158,96 +1126,60 @@
 
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK_LE(shift_imm, 31u);
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond, set_cc);
 }
 
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond, set_cc);
 }
 
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond, set_cc);
 }
 
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                         bool setcc, Condition cond) {
+                         Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 31u);
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
-  }
+  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond, set_cc);
 }
 
-void Arm32Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ROR, 0), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ROR, 0), cond);
-  }
+void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, ROR, 0), cond, set_cc);
 }
 
 
 void Arm32Assembler::Lsl(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSL, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSL, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, LSL, rn), cond, set_cc);
 }
 
 
 void Arm32Assembler::Lsr(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, LSR, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, LSR, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, LSR, rn), cond, set_cc);
 }
 
 
 void Arm32Assembler::Asr(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ASR, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ASR, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, ASR, rn), cond, set_cc);
 }
 
 
 void Arm32Assembler::Ror(Register rd, Register rm, Register rn,
-                         bool setcc, Condition cond) {
-  if (setcc) {
-    movs(rd, ShifterOperand(rm, ROR, rn), cond);
-  } else {
-    mov(rd, ShifterOperand(rm, ROR, rn), cond);
-  }
+                         Condition cond, SetCc set_cc) {
+  mov(rd, ShifterOperand(rm, ROR, rn), cond, set_cc);
 }
 
 void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
@@ -1434,24 +1366,24 @@
                                          Condition cond) {
   ShifterOperand shifter_op;
   if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
-    adds(rd, rn, shifter_op, cond);
+    add(rd, rn, shifter_op, cond, kCcSet);
   } else if (ShifterOperandCanHoldArm32(-value, &shifter_op)) {
-    subs(rd, rn, shifter_op, cond);
+    sub(rd, rn, shifter_op, cond, kCcSet);
   } else {
     CHECK(rn != IP);
     if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else if (ShifterOperandCanHoldArm32(~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      subs(rd, rn, ShifterOperand(IP), cond);
+      sub(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else {
       movw(IP, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
         movt(IP, value_high, cond);
       }
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     }
   }
 }
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 1c38eec..b96bb74 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -39,25 +39,29 @@
   }
 
   // Data-processing instructions.
-  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
+                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void add(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-
-  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
@@ -67,16 +71,17 @@
 
   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
-  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mov(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mvn(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   // Miscellaneous data-processing instructions.
   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
@@ -204,25 +209,25 @@
   void bl(Label* label, Condition cond = AL) OVERRIDE;
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Rrx(Register rd, Register rm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Rrx(Register rd, Register rm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
@@ -305,7 +310,7 @@
   void EmitType01(Condition cond,
                   int type,
                   Opcode opcode,
-                  int set_cc,
+                  SetCc set_cc,
                   Register rn,
                   Register rd,
                   const ShifterOperand& so);
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index efd517b..e6412ac 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -42,7 +42,8 @@
 
 class AssemblerArm32Test : public AssemblerArmTest<arm::Arm32Assembler,
                                                    arm::Register, arm::SRegister,
-                                                   uint32_t, arm::ShifterOperand, arm::Condition> {
+                                                   uint32_t, arm::ShifterOperand, arm::Condition,
+                                                   arm::SetCc> {
  protected:
   std::string GetArchitectureString() OVERRIDE {
     return "arm";
@@ -125,6 +126,10 @@
       conditions_.push_back(arm::Condition::AL);
     }
 
+    set_ccs_.push_back(arm::kCcDontCare);
+    set_ccs_.push_back(arm::kCcSet);
+    set_ccs_.push_back(arm::kCcKeep);
+
     shifter_operands_.push_back(arm::ShifterOperand(0));
     shifter_operands_.push_back(arm::ShifterOperand(1));
     shifter_operands_.push_back(arm::ShifterOperand(2));
@@ -240,6 +245,15 @@
     return oss.str();
   }
 
+  std::vector<arm::SetCc>& GetSetCcs() OVERRIDE {
+    return set_ccs_;
+  }
+
+  std::string GetSetCcString(arm::SetCc s) OVERRIDE {
+    // For arm32, kCcDontCare defaults to not setting condition codes.
+    return s == arm::kCcSet ? "s" : "";
+  }
+
   arm::Register GetPCRegister() OVERRIDE {
     return arm::R15;
   }
@@ -369,12 +383,12 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       cond_index = after_cond_filter.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond_filter.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
       if (EvalFilterString(after_cond_filter)) {
         continue;
@@ -384,6 +398,30 @@
     }
   }
 
+  void TemplateHelper(std::function<void(arm::SetCc)> f, int depth ATTRIBUTE_UNUSED,
+                      bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter,
+                      std::ostringstream& oss) {
+    for (arm::SetCc s : GetSetCcs()) {
+      std::string after_cond = fmt;
+      std::string after_cond_filter = filter;
+
+      size_t cond_index = after_cond.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+
+      cond_index = after_cond_filter.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond_filter.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+      if (EvalFilterString(after_cond_filter)) {
+        continue;
+      }
+
+      ExecuteAndPrint([&] () { f(s); }, after_cond, oss);
+    }
+  }
+
   template <typename... Args>
   void TemplateHelper(std::function<void(arm::Register, Args...)> f, int depth, bool without_pc,
                       std::string fmt, std::string filter, std::ostringstream& oss) {
@@ -449,12 +487,12 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       cond_index = after_cond_filter.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond_filter.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
       if (EvalFilterString(after_cond_filter)) {
         continue;
@@ -466,25 +504,51 @@
     }
   }
 
-  template <typename T1, typename T2>
-  std::function<void(T1, T2)> GetBoundFunction2(void (arm::Arm32Assembler::*f)(T1, T2)) {
+  template <typename... Args>
+  void TemplateHelper(std::function<void(arm::SetCc, Args...)> f, int depth, bool without_pc,
+                      std::string fmt, std::string filter, std::ostringstream& oss) {
+    for (arm::SetCc s : GetSetCcs()) {
+      std::string after_cond = fmt;
+      std::string after_cond_filter = filter;
+
+      size_t cond_index = after_cond.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+
+      cond_index = after_cond_filter.find(SET_CC_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond_filter.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s));
+      }
+      if (EvalFilterString(after_cond_filter)) {
+        continue;
+      }
+
+      auto lambda = [&] (Args... args) { f(s, args...); };  // NOLINT [readability/braces] [4]
+      TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc,
+          after_cond, after_cond_filter, oss);
+    }
+  }
+
+  template <typename Assembler, typename T1, typename T2>
+  std::function<void(T1, T2)> GetBoundFunction2(void (Assembler::*f)(T1, T2)) {
     return std::bind(f, GetAssembler(), _1, _2);
   }
 
-  template <typename T1, typename T2, typename T3>
-  std::function<void(T1, T2, T3)> GetBoundFunction3(void (arm::Arm32Assembler::*f)(T1, T2, T3)) {
+  template <typename Assembler, typename T1, typename T2, typename T3>
+  std::function<void(T1, T2, T3)> GetBoundFunction3(void (Assembler::*f)(T1, T2, T3)) {
     return std::bind(f, GetAssembler(), _1, _2, _3);
   }
 
-  template <typename T1, typename T2, typename T3, typename T4>
+  template <typename Assembler, typename T1, typename T2, typename T3, typename T4>
   std::function<void(T1, T2, T3, T4)> GetBoundFunction4(
-      void (arm::Arm32Assembler::*f)(T1, T2, T3, T4)) {
+      void (Assembler::*f)(T1, T2, T3, T4)) {
     return std::bind(f, GetAssembler(), _1, _2, _3, _4);
   }
 
-  template <typename T1, typename T2, typename T3, typename T4, typename T5>
+  template <typename Assembler, typename T1, typename T2, typename T3, typename T4, typename T5>
   std::function<void(T1, T2, T3, T4, T5)> GetBoundFunction5(
-      void (arm::Arm32Assembler::*f)(T1, T2, T3, T4, T5)) {
+      void (Assembler::*f)(T1, T2, T3, T4, T5)) {
     return std::bind(f, GetAssembler(), _1, _2, _3, _4, _5);
   }
 
@@ -503,26 +567,26 @@
     DriverStr(oss.str(), test_name);
   }
 
-  template <typename... Args>
-  void T2Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T2Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
                 std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name, filter);
   }
 
-  template <typename... Args>
-  void T3Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T3Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
       std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name, filter);
   }
 
-  template <typename... Args>
-  void T4Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T4Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
       std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name, filter);
   }
 
-  template <typename... Args>
-  void T5Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
+  template <typename Assembler, typename... Args>
+  void T5Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt,
       std::string test_name, std::string filter = "") {
     GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name, filter);
   }
@@ -573,6 +637,7 @@
 
   std::vector<arm::Register*> registers_;
   std::vector<arm::Condition> conditions_;
+  std::vector<arm::SetCc> set_ccs_;
   std::vector<arm::ShifterOperand> shifter_operands_;
 };
 
@@ -656,15 +721,23 @@
 }
 
 TEST_F(AssemblerArm32Test, And) {
-  T4Helper(&arm::Arm32Assembler::and_, true, "and{cond} {reg1}, {reg2}, {shift}", "and");
+  T5Helper(&arm::Arm32Assembler::and_, true, "and{cond}{s} {reg1}, {reg2}, {shift}", "and");
+}
+
+TEST_F(AssemblerArm32Test, Ands) {
+  T4Helper(&arm::Arm32Assembler::ands, true, "and{cond}s {reg1}, {reg2}, {shift}", "ands");
 }
 
 TEST_F(AssemblerArm32Test, Eor) {
-  T4Helper(&arm::Arm32Assembler::eor, true, "eor{cond} {reg1}, {reg2}, {shift}", "eor");
+  T5Helper(&arm::Arm32Assembler::eor, true, "eor{cond}{s} {reg1}, {reg2}, {shift}", "eor");
+}
+
+TEST_F(AssemblerArm32Test, Eors) {
+  T4Helper(&arm::Arm32Assembler::eors, true, "eor{cond}s {reg1}, {reg2}, {shift}", "eors");
 }
 
 TEST_F(AssemblerArm32Test, Orr) {
-  T4Helper(&arm::Arm32Assembler::orr, true, "orr{cond} {reg1}, {reg2}, {shift}", "orr");
+  T5Helper(&arm::Arm32Assembler::orr, true, "orr{cond}{s} {reg1}, {reg2}, {shift}", "orr");
 }
 
 TEST_F(AssemblerArm32Test, Orrs) {
@@ -672,11 +745,15 @@
 }
 
 TEST_F(AssemblerArm32Test, Bic) {
-  T4Helper(&arm::Arm32Assembler::bic, true, "bic{cond} {reg1}, {reg2}, {shift}", "bic");
+  T5Helper(&arm::Arm32Assembler::bic, true, "bic{cond}{s} {reg1}, {reg2}, {shift}", "bic");
+}
+
+TEST_F(AssemblerArm32Test, Bics) {
+  T4Helper(&arm::Arm32Assembler::bics, true, "bic{cond}s {reg1}, {reg2}, {shift}", "bics");
 }
 
 TEST_F(AssemblerArm32Test, Mov) {
-  T3Helper(&arm::Arm32Assembler::mov, true, "mov{cond} {reg1}, {shift}", "mov");
+  T4Helper(&arm::Arm32Assembler::mov, true, "mov{cond}{s} {reg1}, {shift}", "mov");
 }
 
 TEST_F(AssemblerArm32Test, Movs) {
@@ -684,7 +761,7 @@
 }
 
 TEST_F(AssemblerArm32Test, Mvn) {
-  T3Helper(&arm::Arm32Assembler::mvn, true, "mvn{cond} {reg1}, {shift}", "mvn");
+  T4Helper(&arm::Arm32Assembler::mvn, true, "mvn{cond}{s} {reg1}, {shift}", "mvn");
 }
 
 TEST_F(AssemblerArm32Test, Mvns) {
@@ -692,7 +769,7 @@
 }
 
 TEST_F(AssemblerArm32Test, Add) {
-  T4Helper(&arm::Arm32Assembler::add, false, "add{cond} {reg1}, {reg2}, {shift}", "add");
+  T5Helper(&arm::Arm32Assembler::add, false, "add{cond}{s} {reg1}, {reg2}, {shift}", "add");
 }
 
 TEST_F(AssemblerArm32Test, Adds) {
@@ -700,11 +777,15 @@
 }
 
 TEST_F(AssemblerArm32Test, Adc) {
-  T4Helper(&arm::Arm32Assembler::adc, false, "adc{cond} {reg1}, {reg2}, {shift}", "adc");
+  T5Helper(&arm::Arm32Assembler::adc, false, "adc{cond}{s} {reg1}, {reg2}, {shift}", "adc");
+}
+
+TEST_F(AssemblerArm32Test, Adcs) {
+  T4Helper(&arm::Arm32Assembler::adcs, false, "adc{cond}s {reg1}, {reg2}, {shift}", "adcs");
 }
 
 TEST_F(AssemblerArm32Test, Sub) {
-  T4Helper(&arm::Arm32Assembler::sub, false, "sub{cond} {reg1}, {reg2}, {shift}", "sub");
+  T5Helper(&arm::Arm32Assembler::sub, false, "sub{cond}{s} {reg1}, {reg2}, {shift}", "sub");
 }
 
 TEST_F(AssemblerArm32Test, Subs) {
@@ -712,11 +793,15 @@
 }
 
 TEST_F(AssemblerArm32Test, Sbc) {
-  T4Helper(&arm::Arm32Assembler::sbc, false, "sbc{cond} {reg1}, {reg2}, {shift}", "sbc");
+  T5Helper(&arm::Arm32Assembler::sbc, false, "sbc{cond}{s} {reg1}, {reg2}, {shift}", "sbc");
+}
+
+TEST_F(AssemblerArm32Test, Sbcs) {
+  T4Helper(&arm::Arm32Assembler::sbcs, false, "sbc{cond}s {reg1}, {reg2}, {shift}", "sbcs");
 }
 
 TEST_F(AssemblerArm32Test, Rsb) {
-  T4Helper(&arm::Arm32Assembler::rsb, true, "rsb{cond} {reg1}, {reg2}, {shift}", "rsb");
+  T5Helper(&arm::Arm32Assembler::rsb, true, "rsb{cond}{s} {reg1}, {reg2}, {shift}", "rsb");
 }
 
 TEST_F(AssemblerArm32Test, Rsbs) {
@@ -724,7 +809,11 @@
 }
 
 TEST_F(AssemblerArm32Test, Rsc) {
-  T4Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond} {reg1}, {reg2}, {shift}", "rsc");
+  T5Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond}{s} {reg1}, {reg2}, {shift}", "rsc");
+}
+
+TEST_F(AssemblerArm32Test, Rscs) {
+  T4Helper(&arm::Arm32Assembler::rscs, false, "rsc{cond}s {reg1}, {reg2}, {shift}", "rscs");
 }
 
 /* TODO: Need better filter support.
diff --git a/compiler/utils/arm/assembler_arm_test.h b/compiler/utils/arm/assembler_arm_test.h
index 838abb6..a85a05e 100644
--- a/compiler/utils/arm/assembler_arm_test.h
+++ b/compiler/utils/arm/assembler_arm_test.h
@@ -21,7 +21,13 @@
 
 namespace art {
 
-template<typename Ass, typename Reg, typename FPReg, typename Imm, typename SOp, typename Cond>
+template<typename Ass,
+         typename Reg,
+         typename FPReg,
+         typename Imm,
+         typename SOp,
+         typename Cond,
+         typename SetCc>
 class AssemblerArmTest : public AssemblerTest<Ass, Reg, FPReg, Imm> {
  public:
   typedef AssemblerTest<Ass, Reg, FPReg, Imm> Base;
@@ -94,7 +100,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (Imm i : immediates1) {
@@ -185,7 +191,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (std::pair<Imm, Imm>& pair : immediates) {
@@ -271,7 +277,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (auto reg1 : reg1_registers) {
@@ -337,7 +343,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (auto reg1 : reg1_registers) {
@@ -401,7 +407,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (const SOp& shift : shifts) {
@@ -457,7 +463,7 @@
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
-        after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+        after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c));
       }
 
       for (const SOp& shift : shifts) {
@@ -511,6 +517,9 @@
   virtual std::vector<Cond>& GetConditions() = 0;
   virtual std::string GetConditionString(Cond c) = 0;
 
+  virtual std::vector<SetCc>& GetSetCcs() = 0;
+  virtual std::string GetSetCcString(SetCc s) = 0;
+
   virtual std::vector<SOp>& GetShiftOperands() = 0;
   virtual std::string GetShiftString(SOp sop) = 0;
 
@@ -534,6 +543,7 @@
   static constexpr const char* REG3_TOKEN = "{reg3}";
   static constexpr const char* REG4_TOKEN = "{reg4}";
   static constexpr const char* COND_TOKEN = "{cond}";
+  static constexpr const char* SET_CC_TOKEN = "{s}";
   static constexpr const char* SHIFT_TOKEN = "{shift}";
 
  private:
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 4e918e9..90ed10c 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -417,128 +417,96 @@
 }
 
 void Thumb2Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, AND, 0, rn, rd, so);
+                           Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, AND, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, EOR, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, EOR, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, SUB, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, SUB, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, RSB, 0, rn, rd, so);
-}
-
-
-void Thumb2Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, RSB, 1, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, RSB, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::add(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, ADD, 0, rn, rd, so);
-}
-
-
-void Thumb2Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, ADD, 1, rn, rd, so);
-}
-
-
-void Thumb2Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
-                           Condition cond) {
-  EmitDataProcessing(cond, SUB, 1, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ADD, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, ADC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ADC, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, SBC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, SBC, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
-                          Condition cond) {
-  EmitDataProcessing(cond, RSC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, RSC, set_cc, rn, rd, so);
 }
 
 
 void Thumb2Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
-  EmitDataProcessing(cond, TST, 1, rn, R0, so);
+  EmitDataProcessing(cond, TST, kCcSet, rn, R0, so);
 }
 
 
 void Thumb2Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
   CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
-  EmitDataProcessing(cond, TEQ, 1, rn, R0, so);
+  EmitDataProcessing(cond, TEQ, kCcSet, rn, R0, so);
 }
 
 
 void Thumb2Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, CMP, 1, rn, R0, so);
+  EmitDataProcessing(cond, CMP, kCcSet, rn, R0, so);
 }
 
 
 void Thumb2Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, CMN, 1, rn, R0, so);
+  EmitDataProcessing(cond, CMN, kCcSet, rn, R0, so);
 }
 
 
-void Thumb2Assembler::orr(Register rd, Register rn,
-                          const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, ORR, 0, rn, rd, so);
+void Thumb2Assembler::orr(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ORR, set_cc, rn, rd, so);
 }
 
 
-void Thumb2Assembler::orrs(Register rd, Register rn,
-                           const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, ORR, 1, rn, rd, so);
-}
-
-
-void Thumb2Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MOV, 0, R0, rd, so);
-}
-
-
-void Thumb2Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MOV, 1, R0, rd, so);
+void Thumb2Assembler::mov(Register rd, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, MOV, set_cc, R0, rd, so);
 }
 
 
 void Thumb2Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
-                       Condition cond) {
-  EmitDataProcessing(cond, BIC, 0, rn, rd, so);
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, BIC, set_cc, rn, rd, so);
 }
 
 
-void Thumb2Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MVN, 0, R0, rd, so);
-}
-
-
-void Thumb2Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
-  EmitDataProcessing(cond, MVN, 1, R0, rd, so);
+void Thumb2Assembler::mvn(Register rd, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, MVN, set_cc, R0, rd, so);
 }
 
 
@@ -1054,7 +1022,7 @@
 
 
 void Thumb2Assembler::MarkExceptionHandler(Label* label) {
-  EmitDataProcessing(AL, TST, 1, PC, R0, ShifterOperand(0));
+  EmitDataProcessing(AL, TST, kCcSet, PC, R0, ShifterOperand(0));
   Label l;
   b(&l);
   EmitBranch(AL, label, false, false);
@@ -1075,9 +1043,9 @@
 }
 
 
-bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
+bool Thumb2Assembler::Is32BitDataProcessing(Condition cond,
                                             Opcode opcode,
-                                            bool set_cc,
+                                            SetCc set_cc,
                                             Register rn,
                                             Register rd,
                                             const ShifterOperand& so) {
@@ -1086,7 +1054,7 @@
   }
 
   // Check special case for SP relative ADD and SUB immediate.
-  if ((opcode == ADD || opcode == SUB) && rn == SP && so.IsImmediate()) {
+  if ((opcode == ADD || opcode == SUB) && rn == SP && so.IsImmediate() && set_cc != kCcSet) {
     // If the immediate is in range, use 16 bit.
     if (rd == SP) {
       if (so.GetImmediate() < (1 << 9)) {    // 9 bit immediate.
@@ -1099,8 +1067,10 @@
     }
   }
 
-  bool can_contain_high_register = (opcode == MOV)
-      || ((opcode == ADD) && (rn == rd) && !set_cc);
+  bool can_contain_high_register =
+      (opcode == CMP) ||
+      (opcode == MOV && set_cc != kCcSet) ||
+      ((opcode == ADD) && (rn == rd) && set_cc != kCcSet);
 
   if (IsHighRegister(rd) || IsHighRegister(rn)) {
     if (!can_contain_high_register) {
@@ -1146,39 +1116,80 @@
   }
 
   if (so.IsImmediate()) {
-    if (rn_is_valid && rn != rd) {
-      // The only thumb1 instruction with a register and an immediate are ADD and SUB.  The
-      // immediate must be 3 bits.
-      if (opcode != ADD && opcode != SUB) {
+    if (opcode == RSB) {
+      DCHECK(rn_is_valid);
+      if (so.GetImmediate() != 0u) {
         return true;
-      } else {
-        // Check that the immediate is 3 bits for ADD and SUB.
-        if (so.GetImmediate() >= 8) {
+      }
+    } else if (rn_is_valid && rn != rd) {
+      // The only thumb1 instructions with a register and an immediate are ADD and SUB
+      // with a 3-bit immediate, and RSB with zero immediate.
+      if (opcode == ADD || opcode == SUB) {
+        if (!IsUint<3>(so.GetImmediate())) {
           return true;
         }
+      } else {
+        return true;
       }
     } else {
       // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
       if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
         return true;
       } else {
-        if (so.GetImmediate() > 255) {
+        if (!IsUint<8>(so.GetImmediate())) {
           return true;
         }
       }
     }
-  }
-
-  // Check for register shift operand.
-  if (so.IsRegister() && so.IsShift()) {
-    if (opcode != MOV) {
-      return true;
-    }
-    // Check for MOV with an ROR.
-    if (so.GetShift() == ROR) {
-      if (so.GetImmediate() != 0) {
+  } else {
+    DCHECK(so.IsRegister());
+    if (so.IsShift()) {
+      // Shift operand - check if it is a MOV convertible to a 16-bit shift instruction.
+      if (opcode != MOV) {
         return true;
       }
+      // Check for MOV with an ROR/RRX. There is no 16-bit ROR immediate and no 16-bit RRX.
+      if (so.GetShift() == ROR || so.GetShift() == RRX) {
+        return true;
+      }
+      // 16-bit shifts set condition codes if and only if outside IT block,
+      // i.e. if and only if cond == AL.
+      if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+        return true;
+      }
+    } else {
+      // Register operand without shift.
+      switch (opcode) {
+        case ADD:
+          // The 16-bit ADD that cannot contain high registers can set condition codes
+          // if and only if outside IT block, i.e. if and only if cond == AL.
+          if (!can_contain_high_register &&
+              ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
+            return true;
+          }
+          break;
+        case AND:
+        case BIC:
+        case EOR:
+        case ORR:
+        case MVN:
+        case ADC:
+        case SUB:
+        case SBC:
+          // These 16-bit opcodes set condition codes if and only if outside IT block,
+          // i.e. if and only if cond == AL.
+          if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+            return true;
+          }
+          break;
+        case RSB:
+        case RSC:
+          // No 16-bit RSB/RSC Rd, Rm, Rn. It would be equivalent to SUB/SBC Rd, Rn, Rm.
+          return true;
+        case CMP:
+        default:
+          break;
+      }
     }
   }
 
@@ -1189,7 +1200,7 @@
 
 void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
                                               Opcode opcode,
-                                              bool set_cc,
+                                              SetCc set_cc,
                                               Register rn,
                                               Register rd,
                                               const ShifterOperand& so) {
@@ -1203,10 +1214,10 @@
     case ADC: thumb_opcode = 10U /* 0b1010 */; break;
     case SBC: thumb_opcode = 11U /* 0b1011 */; break;
     case RSC: break;
-    case TST: thumb_opcode =  0U /* 0b0000 */; set_cc = true; rd = PC; break;
-    case TEQ: thumb_opcode =  4U /* 0b0100 */; set_cc = true; rd = PC; break;
-    case CMP: thumb_opcode = 13U /* 0b1101 */; set_cc = true; rd = PC; break;
-    case CMN: thumb_opcode =  8U /* 0b1000 */; set_cc = true; rd = PC; break;
+    case TST: thumb_opcode =  0U /* 0b0000 */; DCHECK(set_cc == kCcSet); rd = PC; break;
+    case TEQ: thumb_opcode =  4U /* 0b0100 */; DCHECK(set_cc == kCcSet); rd = PC; break;
+    case CMP: thumb_opcode = 13U /* 0b1101 */; DCHECK(set_cc == kCcSet); rd = PC; break;
+    case CMN: thumb_opcode =  8U /* 0b1000 */; DCHECK(set_cc == kCcSet); rd = PC; break;
     case ORR: thumb_opcode =  2U /* 0b0010 */; break;
     case MOV: thumb_opcode =  2U /* 0b0010 */; rn = PC; break;
     case BIC: thumb_opcode =  1U /* 0b0001 */; break;
@@ -1224,7 +1235,7 @@
   if (so.IsImmediate()) {
     // Check special cases.
     if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
-      if (!set_cc) {
+      if (set_cc != kCcSet) {
         if (opcode == SUB) {
           thumb_opcode = 5U;
         } else if (opcode == ADD) {
@@ -1238,7 +1249,7 @@
       uint32_t imm8 = imm & 0xff;
 
       encoding = B31 | B30 | B29 | B28 |
-          (set_cc ? B20 : B25) |
+          (set_cc == kCcSet ? B20 : B25) |
           thumb_opcode << 21 |
           rn << 16 |
           rd << 8 |
@@ -1254,7 +1265,7 @@
       }
       encoding = B31 | B30 | B29 | B28 |
           thumb_opcode << 21 |
-          (set_cc ? B20 : 0) |
+          (set_cc == kCcSet ? B20 : 0) |
           rn << 16 |
           rd << 8 |
           imm;
@@ -1263,7 +1274,7 @@
     // Register (possibly shifted)
     encoding = B31 | B30 | B29 | B27 | B25 |
         thumb_opcode << 21 |
-        (set_cc ? B20 : 0) |
+        (set_cc == kCcSet ? B20 : 0) |
         rn << 16 |
         rd << 8 |
         so.encodingThumb();
@@ -1274,7 +1285,7 @@
 
 void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
                                               Opcode opcode,
-                                              bool set_cc,
+                                              SetCc set_cc,
                                               Register rn,
                                               Register rd,
                                               const ShifterOperand& so) {
@@ -1304,19 +1315,25 @@
     rn = so.GetRegister();
 
     switch (so.GetShift()) {
-    case LSL: thumb_opcode = 0U /* 0b00 */; break;
-    case LSR: thumb_opcode = 1U /* 0b01 */; break;
-    case ASR: thumb_opcode = 2U /* 0b10 */; break;
-    case ROR:
-      // ROR doesn't allow immediates.
-      thumb_opcode = 7U /* 0b111 */;
-      dp_opcode = 1U /* 0b01 */;
-      opcode_shift = 6;
-      use_immediate = false;
+    case LSL:
+      DCHECK_LE(immediate, 31u);
+      thumb_opcode = 0U /* 0b00 */;
       break;
-    case RRX: break;
+    case LSR:
+      DCHECK(1 <= immediate && immediate <= 32);
+      immediate &= 31;  // 32 is encoded as 0.
+      thumb_opcode = 1U /* 0b01 */;
+      break;
+    case ASR:
+      DCHECK(1 <= immediate && immediate <= 32);
+      immediate &= 31;  // 32 is encoded as 0.
+      thumb_opcode = 2U /* 0b10 */;
+      break;
+    case ROR:  // No 16-bit ROR immediate.
+    case RRX:  // No 16-bit RRX.
     default:
-     break;
+      LOG(FATAL) << "Unexpected shift: " << so.GetShift();
+      UNREACHABLE();
     }
   } else {
     if (so.IsImmediate()) {
@@ -1334,6 +1351,9 @@
         case ADC:
         case SBC:
         case BIC: {
+          // Sets condition codes if and only if outside IT block,
+          // check that it complies with set_cc.
+          DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
           if (rn == rd) {
             rn = so.GetRegister();
           } else {
@@ -1348,9 +1368,17 @@
           rn = so.GetRegister();
           break;
         }
-        case TST:
-        case TEQ:
         case MVN: {
+          // Sets condition codes if and only if outside IT block,
+          // check that it complies with set_cc.
+          DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
+          CHECK_EQ(rn, 0);
+          rn = so.GetRegister();
+          break;
+        }
+        case TST:
+        case TEQ: {
+          DCHECK(set_cc == kCcSet);
           CHECK_EQ(rn, 0);
           rn = so.GetRegister();
           break;
@@ -1371,6 +1399,7 @@
       case TST: thumb_opcode = 8U /* 0b1000 */; CHECK(!use_immediate); break;
       case MVN: thumb_opcode = 15U /* 0b1111 */; CHECK(!use_immediate); break;
       case CMP: {
+        DCHECK(set_cc == kCcSet);
         if (use_immediate) {
           // T2 encoding.
           dp_opcode = 0;
@@ -1378,6 +1407,13 @@
           thumb_opcode = 5U /* 0b101 */;
           rd_shift = 8;
           rn_shift = 8;
+        } else if (IsHighRegister(rd) || IsHighRegister(rn)) {
+          // Special cmp for high registers.
+          dp_opcode = 1U /* 0b01 */;
+          opcode_shift = 7;
+          // Put the top bit of rd into the bottom bit of the opcode.
+          thumb_opcode = 10U /* 0b0001010 */ | static_cast<uint32_t>(rd) >> 3;
+          rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
         } else {
           thumb_opcode = 10U /* 0b1010 */;
         }
@@ -1399,7 +1435,7 @@
           rn_shift = 8;
         } else {
           rn = so.GetRegister();
-          if (IsHighRegister(rn) || IsHighRegister(rd)) {
+          if (set_cc != kCcSet) {
             // Special mov for high registers.
             dp_opcode = 1U /* 0b01 */;
             opcode_shift = 7;
@@ -1407,6 +1443,8 @@
             thumb_opcode = 12U /* 0b0001100 */ | static_cast<uint32_t>(rd) >> 3;
             rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
           } else {
+            DCHECK(!IsHighRegister(rn));
+            DCHECK(!IsHighRegister(rd));
             thumb_opcode = 0;
           }
         }
@@ -1436,9 +1474,9 @@
 
 
 // ADD and SUB are complex enough to warrant their own emitter.
-void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED,
+void Thumb2Assembler::Emit16BitAddSub(Condition cond,
                                       Opcode opcode,
-                                      bool set_cc,
+                                      SetCc set_cc,
                                       Register rn,
                                       Register rd,
                                       const ShifterOperand& so) {
@@ -1449,7 +1487,7 @@
   uint8_t immediate_shift = 0;
   bool use_immediate = false;
   uint32_t immediate = 0;  // Should be at most 9 bits but keep the full immediate for CHECKs.
-  uint8_t thumb_opcode;;
+  uint8_t thumb_opcode;
 
   if (so.IsImmediate()) {
     use_immediate = true;
@@ -1460,7 +1498,7 @@
     case ADD:
       if (so.IsRegister()) {
         Register rm = so.GetRegister();
-        if (rn == rd && !set_cc) {
+        if (rn == rd && set_cc != kCcSet) {
           // Can use T2 encoding (allows 4 bit registers)
           dp_opcode = 1U /* 0b01 */;
           opcode_shift = 10;
@@ -1471,6 +1509,12 @@
           rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
         } else {
           // T1.
+          DCHECK(!IsHighRegister(rd));
+          DCHECK(!IsHighRegister(rn));
+          DCHECK(!IsHighRegister(rm));
+          // Sets condition codes if and only if outside IT block,
+          // check that it complies with set_cc.
+          DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
           opcode_shift = 9;
           thumb_opcode = 12U /* 0b01100 */;
           immediate = static_cast<uint32_t>(so.GetRegister());
@@ -1523,40 +1567,47 @@
 
     case SUB:
       if (so.IsRegister()) {
-         // T1.
-         opcode_shift = 9;
-         thumb_opcode = 13U /* 0b01101 */;
-         immediate = static_cast<uint32_t>(so.GetRegister());
-         use_immediate = true;
-         immediate_shift = 6;
-       } else {
-         if (rd == SP && rn == SP) {
-           // SUB sp, sp, #imm
-           dp_opcode = 2U /* 0b10 */;
-           thumb_opcode = 0x61 /* 0b1100001 */;
-           opcode_shift = 7;
-           CHECK_LT(immediate, (1u << 9));
-           CHECK_ALIGNED(immediate, 4);
+        // T1.
+        Register rm = so.GetRegister();
+        DCHECK(!IsHighRegister(rd));
+        DCHECK(!IsHighRegister(rn));
+        DCHECK(!IsHighRegister(rm));
+        // Sets condition codes if and only if outside IT block,
+        // check that it complies with set_cc.
+        DCHECK((cond == AL) ? set_cc != kCcKeep : set_cc != kCcSet);
+        opcode_shift = 9;
+        thumb_opcode = 13U /* 0b01101 */;
+        immediate = static_cast<uint32_t>(rm);
+        use_immediate = true;
+        immediate_shift = 6;
+      } else {
+        if (rd == SP && rn == SP) {
+          // SUB sp, sp, #imm
+          dp_opcode = 2U /* 0b10 */;
+          thumb_opcode = 0x61 /* 0b1100001 */;
+          opcode_shift = 7;
+          CHECK_LT(immediate, (1u << 9));
+          CHECK_ALIGNED(immediate, 4);
 
-           // Remove rd and rn from instruction by orring it with immed and clearing bits.
-           rn = R0;
-           rd = R0;
-           rd_shift = 0;
-           rn_shift = 0;
-           immediate >>= 2;
-         } else if (rn != rd) {
-           // Must use T1.
-           opcode_shift = 9;
-           thumb_opcode = 15U /* 0b01111 */;
-           immediate_shift = 6;
-         } else {
-           // T2 encoding.
-           opcode_shift = 11;
-           thumb_opcode = 7U /* 0b111 */;
-           rd_shift = 8;
-           rn_shift = 8;
-         }
-       }
+          // Remove rd and rn from instruction by orring it with immed and clearing bits.
+          rn = R0;
+          rd = R0;
+          rd_shift = 0;
+          rn_shift = 0;
+          immediate >>= 2;
+        } else if (rn != rd) {
+          // Must use T1.
+          opcode_shift = 9;
+          thumb_opcode = 15U /* 0b01111 */;
+          immediate_shift = 6;
+        } else {
+          // T2 encoding.
+          opcode_shift = 11;
+          thumb_opcode = 7U /* 0b111 */;
+          rd_shift = 8;
+          rn_shift = 8;
+        }
+      }
       break;
     default:
       LOG(FATAL) << "This opcode is not an ADD or SUB: " << opcode;
@@ -1575,7 +1626,7 @@
 
 void Thumb2Assembler::EmitDataProcessing(Condition cond,
                                          Opcode opcode,
-                                         bool set_cc,
+                                         SetCc set_cc,
                                          Register rn,
                                          Register rd,
                                          const ShifterOperand& so) {
@@ -1589,9 +1640,15 @@
   }
 }
 
-void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc) {
+void Thumb2Assembler::EmitShift(Register rd,
+                                Register rm,
+                                Shift shift,
+                                uint8_t amount,
+                                Condition cond,
+                                SetCc set_cc) {
   CHECK_LT(amount, (1 << 5));
-  if (IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) {
+  if ((IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) ||
+      ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
     uint16_t opcode = 0;
     switch (shift) {
       case LSL: opcode = 0U /* 0b00 */; break;
@@ -1605,7 +1662,7 @@
     }
     // 32 bit.
     int32_t encoding = B31 | B30 | B29 | B27 | B25 | B22 |
-        0xf << 16 | (setcc ? B20 : 0);
+        0xf << 16 | (set_cc == kCcSet ? B20 : 0);
     uint32_t imm3 = amount >> 2;
     uint32_t imm2 = amount & 3U /* 0b11 */;
     encoding |= imm3 << 12 | imm2 << 6 | static_cast<int16_t>(rm) |
@@ -1628,10 +1685,16 @@
   }
 }
 
-void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc) {
+void Thumb2Assembler::EmitShift(Register rd,
+                                Register rn,
+                                Shift shift,
+                                Register rm,
+                                Condition cond,
+                                SetCc set_cc) {
   CHECK_NE(shift, RRX);
   bool must_be_32bit = false;
-  if (IsHighRegister(rd) || IsHighRegister(rm) || IsHighRegister(rn) || rd != rn) {
+  if (IsHighRegister(rd) || IsHighRegister(rm) || IsHighRegister(rn) || rd != rn ||
+      ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
     must_be_32bit = true;
   }
 
@@ -1648,7 +1711,7 @@
      }
      // 32 bit.
      int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 |
-         0xf << 12 | (setcc ? B20 : 0);
+         0xf << 12 | (set_cc == kCcSet ? B20 : 0);
      encoding |= static_cast<int16_t>(rn) << 16 | static_cast<int16_t>(rm) |
          static_cast<int16_t>(rd) << 8 | opcode << 21;
      Emit32(encoding);
@@ -1658,6 +1721,7 @@
       case LSL: opcode = 2U /* 0b0010 */; break;
       case LSR: opcode = 3U /* 0b0011 */; break;
       case ASR: opcode = 4U /* 0b0100 */; break;
+      case ROR: opcode = 7U /* 0b0111 */; break;
       default:
         LOG(FATAL) << "Unsupported thumb2 shift opcode";
         UNREACHABLE();
@@ -2915,70 +2979,70 @@
 
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK_LE(shift_imm, 31u);
   CheckCondition(cond);
-  EmitShift(rd, rm, LSL, shift_imm, setcc);
+  EmitShift(rd, rm, LSL, shift_imm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
-  EmitShift(rd, rm, LSR, shift_imm, setcc);
+  EmitShift(rd, rm, LSR, shift_imm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
-  EmitShift(rd, rm, ASR, shift_imm, setcc);
+  EmitShift(rd, rm, ASR, shift_imm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CHECK(1u <= shift_imm && shift_imm <= 31u);
   CheckCondition(cond);
-  EmitShift(rd, rm, ROR, shift_imm, setcc);
+  EmitShift(rd, rm, ROR, shift_imm, cond, set_cc);
 }
 
 
-void Thumb2Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
+void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, RRX, rm, setcc);
+  EmitShift(rd, rm, RRX, rm, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, LSL, rn, setcc);
+  EmitShift(rd, rm, LSL, rn, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, LSR, rn, setcc);
+  EmitShift(rd, rm, LSR, rn, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Asr(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, ASR, rn, setcc);
+  EmitShift(rd, rm, ASR, rn, cond, set_cc);
 }
 
 
 void Thumb2Assembler::Ror(Register rd, Register rm, Register rn,
-                          bool setcc, Condition cond) {
+                          Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, ROR, rn, setcc);
+  EmitShift(rd, rm, ROR, rn, cond, set_cc);
 }
 
 
@@ -3173,24 +3237,24 @@
                                           Condition cond) {
   ShifterOperand shifter_op;
   if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
-    adds(rd, rn, shifter_op, cond);
+    add(rd, rn, shifter_op, cond, kCcSet);
   } else if (ShifterOperandCanHold(rd, rn, ADD, -value, &shifter_op)) {
-    subs(rd, rn, shifter_op, cond);
+    sub(rd, rn, shifter_op, cond, kCcSet);
   } else {
     CHECK(rn != IP);
     if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
-      subs(rd, rn, ShifterOperand(IP), cond);
+      sub(rd, rn, ShifterOperand(IP), cond, kCcSet);
     } else {
       movw(IP, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
         movt(IP, value_high, cond);
       }
-      adds(rd, rn, ShifterOperand(IP), cond);
+      add(rd, rn, ShifterOperand(IP), cond, kCcSet);
     }
   }
 }
@@ -3316,7 +3380,7 @@
       }
     }
     LoadImmediate(tmp_reg, offset, cond);
-    add(tmp_reg, tmp_reg, ShifterOperand(base), cond);
+    add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
     base = tmp_reg;
     offset = 0;
   }
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 41eb5d3..c802c27 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -63,25 +63,29 @@
   void FinalizeCode() OVERRIDE;
 
   // Data-processing instructions.
-  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
+                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void add(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-
-  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
@@ -91,16 +95,17 @@
 
   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
 
-  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mov(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
-  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  virtual void mvn(Register rd, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   // Miscellaneous data-processing instructions.
   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
@@ -245,25 +250,25 @@
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Rrx(Register rd, Register rm, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Rrx(Register rd, Register rm,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
-  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
-           Condition cond = AL) OVERRIDE;
+  virtual void Lsl(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Lsr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Asr(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+  virtual void Ror(Register rd, Register rm, Register rn,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
@@ -600,7 +605,7 @@
   // Emit a single 32 or 16 bit data processing instruction.
   void EmitDataProcessing(Condition cond,
                           Opcode opcode,
-                          bool set_cc,
+                          SetCc set_cc,
                           Register rn,
                           Register rd,
                           const ShifterOperand& so);
@@ -609,7 +614,7 @@
   // in 16 bits?
   bool Is32BitDataProcessing(Condition cond,
                              Opcode opcode,
-                             bool set_cc,
+                             SetCc set_cc,
                              Register rn,
                              Register rd,
                              const ShifterOperand& so);
@@ -617,7 +622,7 @@
   // Emit a 32 bit data processing instruction.
   void Emit32BitDataProcessing(Condition cond,
                                Opcode opcode,
-                               bool set_cc,
+                               SetCc set_cc,
                                Register rn,
                                Register rd,
                                const ShifterOperand& so);
@@ -625,14 +630,14 @@
   // Emit a 16 bit data processing instruction.
   void Emit16BitDataProcessing(Condition cond,
                                Opcode opcode,
-                               bool set_cc,
+                               SetCc set_cc,
                                Register rn,
                                Register rd,
                                const ShifterOperand& so);
 
   void Emit16BitAddSub(Condition cond,
                        Opcode opcode,
-                       bool set_cc,
+                       SetCc set_cc,
                        Register rn,
                        Register rd,
                        const ShifterOperand& so);
@@ -694,8 +699,10 @@
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
   int DecodeTstOffset(int32_t inst);
-  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc = false);
-  void EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc = false);
+  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
+                 Condition cond = AL, SetCc set_cc = kCcDontCare);
+  void EmitShift(Register rd, Register rn, Shift shift, Register rm,
+                 Condition cond = AL, SetCc set_cc = kCcDontCare);
 
   // Whether the assembler can relocate branches. If false, unresolved branches will be
   // emitted on 32bits.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index cb01cea..b2a354b 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -199,6 +199,7 @@
 TEST(Thumb2AssemblerTest, SimpleMov) {
   arm::Thumb2Assembler assembler;
 
+  __ movs(R0, ShifterOperand(R1));
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
 
@@ -222,8 +223,8 @@
   arm::Thumb2Assembler assembler;
 
   __ mov(R0, ShifterOperand(R1));
-  __ add(R0, R1, ShifterOperand(R2));
-  __ add(R0, R1, ShifterOperand());
+  __ adds(R0, R1, ShifterOperand(R2));
+  __ add(R0, R1, ShifterOperand(0));
 
   EmitAndCheck(&assembler, "SimpleMovAdd");
 }
@@ -231,41 +232,132 @@
 TEST(Thumb2AssemblerTest, DataProcessingRegister) {
   arm::Thumb2Assembler assembler;
 
+  // 32 bit variants using low registers.
+  __ mvn(R0, ShifterOperand(R1), AL, kCcKeep);
+  __ add(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ sub(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ and_(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ orr(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ eor(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ bic(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ adc(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ sbc(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ rsb(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ teq(R0, ShifterOperand(R1));
+
+  // 16 bit variants using low registers.
+  __ movs(R0, ShifterOperand(R1));
+  __ mov(R0, ShifterOperand(R1), AL, kCcKeep);
+  __ mvns(R0, ShifterOperand(R1));
+  __ add(R0, R0, ShifterOperand(R1), AL, kCcKeep);
+  __ adds(R0, R1, ShifterOperand(R2));
+  __ subs(R0, R1, ShifterOperand(R2));
+  __ adcs(R0, R0, ShifterOperand(R1));
+  __ sbcs(R0, R0, ShifterOperand(R1));
+  __ ands(R0, R0, ShifterOperand(R1));
+  __ orrs(R0, R0, ShifterOperand(R1));
+  __ eors(R0, R0, ShifterOperand(R1));
+  __ bics(R0, R0, ShifterOperand(R1));
+  __ tst(R0, ShifterOperand(R1));
+  __ cmp(R0, ShifterOperand(R1));
+  __ cmn(R0, ShifterOperand(R1));
+
+  // 16-bit variants using high registers.
+  __ mov(R1, ShifterOperand(R8), AL, kCcKeep);
+  __ mov(R9, ShifterOperand(R0), AL, kCcKeep);
+  __ mov(R8, ShifterOperand(R9), AL, kCcKeep);
+  __ add(R1, R1, ShifterOperand(R8), AL, kCcKeep);
+  __ add(R9, R9, ShifterOperand(R0), AL, kCcKeep);
+  __ add(R8, R8, ShifterOperand(R9), AL, kCcKeep);
+  __ cmp(R0, ShifterOperand(R9));
+  __ cmp(R8, ShifterOperand(R1));
+  __ cmp(R9, ShifterOperand(R8));
+
+  // The 16-bit RSBS Rd, Rn, #0, also known as NEGS Rd, Rn is specified using
+  // an immediate (0) but emitted without any, so we test it here.
+  __ rsbs(R0, R1, ShifterOperand(0));
+  __ rsbs(R0, R0, ShifterOperand(0));  // Check Rd == Rn code path.
+
+  // 32 bit variants using high registers that would be 16-bit if using low registers.
+  __ movs(R0, ShifterOperand(R8));
+  __ mvns(R0, ShifterOperand(R8));
+  __ add(R0, R1, ShifterOperand(R8), AL, kCcKeep);
+  __ adds(R0, R1, ShifterOperand(R8));
+  __ subs(R0, R1, ShifterOperand(R8));
+  __ adcs(R0, R0, ShifterOperand(R8));
+  __ sbcs(R0, R0, ShifterOperand(R8));
+  __ ands(R0, R0, ShifterOperand(R8));
+  __ orrs(R0, R0, ShifterOperand(R8));
+  __ eors(R0, R0, ShifterOperand(R8));
+  __ bics(R0, R0, ShifterOperand(R8));
+  __ tst(R0, ShifterOperand(R8));
+  __ cmn(R0, ShifterOperand(R8));
+  __ rsbs(R0, R8, ShifterOperand(0));  // Check that this is not emitted as 16-bit.
+  __ rsbs(R8, R8, ShifterOperand(0));  // Check that this is not emitted as 16-bit (Rd == Rn).
+
+  // 32-bit variants of instructions that would be 16-bit outside IT block.
+  __ it(arm::EQ);
+  __ mvns(R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ adds(R0, R1, ShifterOperand(R2), arm::EQ);
+  __ it(arm::EQ);
+  __ subs(R0, R1, ShifterOperand(R2), arm::EQ);
+  __ it(arm::EQ);
+  __ adcs(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ sbcs(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ ands(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ orrs(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ eors(R0, R0, ShifterOperand(R1), arm::EQ);
+  __ it(arm::EQ);
+  __ bics(R0, R0, ShifterOperand(R1), arm::EQ);
+
+  // 16-bit variants of instructions that would be 32-bit outside IT block.
+  __ it(arm::EQ);
+  __ mvn(R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ add(R0, R1, ShifterOperand(R2), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ sub(R0, R1, ShifterOperand(R2), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ adc(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ sbc(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ and_(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ orr(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ eor(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+  __ it(arm::EQ);
+  __ bic(R0, R0, ShifterOperand(R1), arm::EQ, kCcKeep);
+
+  // 16 bit variants selected for the default kCcDontCare.
   __ mov(R0, ShifterOperand(R1));
   __ mvn(R0, ShifterOperand(R1));
-
-  // 32 bit variants.
+  __ add(R0, R0, ShifterOperand(R1));
   __ add(R0, R1, ShifterOperand(R2));
   __ sub(R0, R1, ShifterOperand(R2));
-  __ and_(R0, R1, ShifterOperand(R2));
-  __ orr(R0, R1, ShifterOperand(R2));
-  __ eor(R0, R1, ShifterOperand(R2));
-  __ bic(R0, R1, ShifterOperand(R2));
-  __ adc(R0, R1, ShifterOperand(R2));
-  __ sbc(R0, R1, ShifterOperand(R2));
-  __ rsb(R0, R1, ShifterOperand(R2));
-
-  // 16 bit variants.
-  __ add(R0, R1, ShifterOperand());
-  __ sub(R0, R1, ShifterOperand());
+  __ adc(R0, R0, ShifterOperand(R1));
+  __ sbc(R0, R0, ShifterOperand(R1));
   __ and_(R0, R0, ShifterOperand(R1));
   __ orr(R0, R0, ShifterOperand(R1));
   __ eor(R0, R0, ShifterOperand(R1));
   __ bic(R0, R0, ShifterOperand(R1));
-  __ adc(R0, R0, ShifterOperand(R1));
-  __ sbc(R0, R0, ShifterOperand(R1));
-  __ rsb(R0, R0, ShifterOperand(R1));
+  __ mov(R1, ShifterOperand(R8));
+  __ mov(R9, ShifterOperand(R0));
+  __ mov(R8, ShifterOperand(R9));
+  __ add(R1, R1, ShifterOperand(R8));
+  __ add(R9, R9, ShifterOperand(R0));
+  __ add(R8, R8, ShifterOperand(R9));
+  __ rsb(R0, R1, ShifterOperand(0));
+  __ rsb(R0, R0, ShifterOperand(0));
 
-  __ tst(R0, ShifterOperand(R1));
-  __ teq(R0, ShifterOperand(R1));
-  __ cmp(R0, ShifterOperand(R1));
-  __ cmn(R0, ShifterOperand(R1));
-
-  __ movs(R0, ShifterOperand(R1));
-  __ mvns(R0, ShifterOperand(R1));
-
-  // 32 bit variants.
-  __ add(R12, R1, ShifterOperand(R0));
+  // And an arbitrary 32-bit instruction using IP.
+  __ add(R12, R1, ShifterOperand(R0), AL, kCcKeep);
 
   EmitAndCheck(&assembler, "DataProcessingRegister");
 }
@@ -296,6 +388,9 @@
   __ movs(R0, ShifterOperand(0x55));
   __ mvns(R0, ShifterOperand(0x55));
 
+  __ adds(R0, R1, ShifterOperand(5));
+  __ subs(R0, R1, ShifterOperand(5));
+
   EmitAndCheck(&assembler, "DataProcessingImmediate");
 }
 
@@ -340,18 +435,30 @@
 TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
   arm::Thumb2Assembler assembler;
 
-  __ mov(R3, ShifterOperand(R4, LSL, 4));
-  __ mov(R3, ShifterOperand(R4, LSR, 5));
-  __ mov(R3, ShifterOperand(R4, ASR, 6));
-  __ mov(R3, ShifterOperand(R4, ROR, 7));
-  __ mov(R3, ShifterOperand(R4, ROR));
+  // 16-bit variants.
+  __ movs(R3, ShifterOperand(R4, LSL, 4));
+  __ movs(R3, ShifterOperand(R4, LSR, 5));
+  __ movs(R3, ShifterOperand(R4, ASR, 6));
 
-  // 32 bit variants.
-  __ mov(R8, ShifterOperand(R4, LSL, 4));
-  __ mov(R8, ShifterOperand(R4, LSR, 5));
-  __ mov(R8, ShifterOperand(R4, ASR, 6));
-  __ mov(R8, ShifterOperand(R4, ROR, 7));
-  __ mov(R8, ShifterOperand(R4, RRX));
+  // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+  __ movs(R3, ShifterOperand(R4, ROR, 7));
+
+  // 32-bit RRX because RRX has no 16-bit version.
+  __ movs(R3, ShifterOperand(R4, RRX));
+
+  // 32 bit variants (not setting condition codes).
+  __ mov(R3, ShifterOperand(R4, LSL, 4), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, LSR, 5), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, ASR, 6), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, ROR, 7), AL, kCcKeep);
+  __ mov(R3, ShifterOperand(R4, RRX), AL, kCcKeep);
+
+  // 32 bit variants (high registers).
+  __ movs(R8, ShifterOperand(R4, LSL, 4));
+  __ movs(R8, ShifterOperand(R4, LSR, 5));
+  __ movs(R8, ShifterOperand(R4, ASR, 6));
+  __ movs(R8, ShifterOperand(R4, ROR, 7));
+  __ movs(R8, ShifterOperand(R4, RRX));
 
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
@@ -1023,7 +1130,7 @@
 TEST(Thumb2AssemblerTest, Shifts) {
   arm::Thumb2Assembler assembler;
 
-  // 16 bit
+  // 16 bit selected for CcDontCare.
   __ Lsl(R0, R1, 5);
   __ Lsr(R0, R1, 5);
   __ Asr(R0, R1, 5);
@@ -1031,6 +1138,32 @@
   __ Lsl(R0, R0, R1);
   __ Lsr(R0, R0, R1);
   __ Asr(R0, R0, R1);
+  __ Ror(R0, R0, R1);
+
+  // 16 bit with kCcSet.
+  __ Lsls(R0, R1, 5);
+  __ Lsrs(R0, R1, 5);
+  __ Asrs(R0, R1, 5);
+
+  __ Lsls(R0, R0, R1);
+  __ Lsrs(R0, R0, R1);
+  __ Asrs(R0, R0, R1);
+  __ Rors(R0, R0, R1);
+
+  // 32-bit with kCcKeep.
+  __ Lsl(R0, R1, 5, AL, kCcKeep);
+  __ Lsr(R0, R1, 5, AL, kCcKeep);
+  __ Asr(R0, R1, 5, AL, kCcKeep);
+
+  __ Lsl(R0, R0, R1, AL, kCcKeep);
+  __ Lsr(R0, R0, R1, AL, kCcKeep);
+  __ Asr(R0, R0, R1, AL, kCcKeep);
+  __ Ror(R0, R0, R1, AL, kCcKeep);
+
+  // 32-bit because ROR immediate doesn't have a 16-bit version like the other shifts.
+  __ Ror(R0, R1, 5);
+  __ Rors(R0, R1, 5);
+  __ Ror(R0, R1, 5, AL, kCcKeep);
 
   // 32 bit due to high registers.
   __ Lsl(R8, R1, 5);
@@ -1052,21 +1185,21 @@
   // S bit (all 32 bit)
 
   // 32 bit due to high registers.
-  __ Lsl(R8, R1, 5, true);
-  __ Lsr(R0, R8, 5, true);
-  __ Asr(R8, R1, 5, true);
-  __ Ror(R0, R8, 5, true);
+  __ Lsls(R8, R1, 5);
+  __ Lsrs(R0, R8, 5);
+  __ Asrs(R8, R1, 5);
+  __ Rors(R0, R8, 5);
 
   // 32 bit due to different Rd and Rn.
-  __ Lsl(R0, R1, R2, true);
-  __ Lsr(R0, R1, R2, true);
-  __ Asr(R0, R1, R2, true);
-  __ Ror(R0, R1, R2, true);
+  __ Lsls(R0, R1, R2);
+  __ Lsrs(R0, R1, R2);
+  __ Asrs(R0, R1, R2);
+  __ Rors(R0, R1, R2);
 
   // 32 bit due to use of high registers.
-  __ Lsl(R8, R1, R2, true);
-  __ Lsr(R0, R8, R2, true);
-  __ Asr(R0, R1, R8, true);
+  __ Lsls(R8, R1, R2);
+  __ Lsrs(R0, R8, R2);
+  __ Asrs(R0, R1, R8);
 
   EmitAndCheck(&assembler, "Shifts");
 }
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 280ed77..82ad642 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,8 +1,9 @@
 const char* SimpleMovResults[] = {
   "   0:	0008      	movs	r0, r1\n",
-  "   2:	46c8      	mov	r8, r9\n",
-  "   4:	2001      	movs	r0, #1\n",
-  "   6:	f04f 0809 	mov.w	r8, #9\n",
+  "   2:	4608      	mov	r0, r1\n",
+  "   4:	46c8      	mov	r8, r9\n",
+  "   6:	2001      	movs	r0, #1\n",
+  "   8:	f04f 0809 	mov.w	r8, #9\n",
   nullptr
 };
 const char* SimpleMov32Results[] = {
@@ -11,39 +12,120 @@
   nullptr
 };
 const char* SimpleMovAddResults[] = {
-  "   0:	0008      	movs	r0, r1\n",
+  "   0:	4608      	mov	r0, r1\n",
   "   2:	1888      	adds	r0, r1, r2\n",
   "   4:	1c08      	adds	r0, r1, #0\n",
   nullptr
 };
 const char* DataProcessingRegisterResults[] = {
-  "   0:	0008      	movs	r0, r1\n",
-  "   2:	43c8      	mvns	r0, r1\n",
-  "   4:	1888      	adds	r0, r1, r2\n",
-  "   6:	1a88      	subs	r0, r1, r2\n",
-  "   8:	ea01 0002 	and.w	r0, r1, r2\n",
-  "   c:	ea41 0002 	orr.w	r0, r1, r2\n",
-  "  10:	ea81 0002 	eor.w	r0, r1, r2\n",
-  "  14:	ea21 0002 	bic.w	r0, r1, r2\n",
-  "  18:	eb41 0002 	adc.w	r0, r1, r2\n",
-  "  1c:	eb61 0002 	sbc.w	r0, r1, r2\n",
-  "  20:	ebc1 0002 	rsb	r0, r1, r2\n",
-  "  24:	1c08      	adds	r0, r1, #0\n",
-  "  26:	1e08      	subs	r0, r1, #0\n",
-  "  28:	4008      	ands	r0, r1\n",
-  "  2a:	4308      	orrs	r0, r1\n",
-  "  2c:	4048      	eors	r0, r1\n",
-  "  2e:	4388      	bics	r0, r1\n",
-  "  30:	4148      	adcs	r0, r1\n",
-  "  32:	4188      	sbcs	r0, r1\n",
-  "  34:	4248      	negs	r0, r1\n",
-  "  36:	4208      	tst	r0, r1\n",
-  "  38:	ea90 0f01 	teq	r0, r1\n",
-  "  3c:	4288      	cmp	r0, r1\n",
-  "  3e:	42c8      	cmn	r0, r1\n",
-  "  40:	0008      	movs	r0, r1\n",
-  "  42:	43c8      	mvns	r0, r1\n",
-  "  44:	eb01 0c00   add.w	ip, r1, r0\n",
+  "   0:	ea6f 0001 	mvn.w	r0, r1\n",
+  "   4:	eb01 0002 	add.w	r0, r1, r2\n",
+  "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
+  "   c:	ea01 0002 	and.w	r0, r1, r2\n",
+  "  10:	ea41 0002 	orr.w	r0, r1, r2\n",
+  "  14:	ea81 0002 	eor.w	r0, r1, r2\n",
+  "  18:	ea21 0002 	bic.w	r0, r1, r2\n",
+  "  1c:	eb41 0002 	adc.w	r0, r1, r2\n",
+  "  20:	eb61 0002 	sbc.w	r0, r1, r2\n",
+  "  24:	ebc1 0002 	rsb	r0, r1, r2\n",
+  "  28:	ea90 0f01 	teq	r0, r1\n",
+  "  2c:	0008      	movs	r0, r1\n",
+  "  2e:	4608      	mov	r0, r1\n",
+  "  30:	43c8      	mvns	r0, r1\n",
+  "  32:	4408      	add	r0, r1\n",
+  "  34:	1888      	adds	r0, r1, r2\n",
+  "  36:	1a88      	subs	r0, r1, r2\n",
+  "  38:	4148      	adcs	r0, r1\n",
+  "  3a:	4188      	sbcs	r0, r1\n",
+  "  3c:	4008      	ands	r0, r1\n",
+  "  3e:	4308      	orrs	r0, r1\n",
+  "  40:	4048      	eors	r0, r1\n",
+  "  42:	4388      	bics	r0, r1\n",
+  "  44:	4208      	tst	r0, r1\n",
+  "  46:	4288      	cmp	r0, r1\n",
+  "  48:	42c8      	cmn	r0, r1\n",
+  "  4a:	4641		mov	r1, r8\n",
+  "  4c:	4681		mov	r9, r0\n",
+  "  4e:	46c8		mov	r8, r9\n",
+  "  50:	4441		add	r1, r8\n",
+  "  52:	4481		add	r9, r0\n",
+  "  54:	44c8		add	r8, r9\n",
+  "  56:	4548		cmp	r0, r9\n",
+  "  58:	4588		cmp	r8, r1\n",
+  "  5a:	45c1		cmp	r9, r8\n",
+  "  5c:	4248   	   	negs	r0, r1\n",
+  "  5e:	4240   	   	negs	r0, r0\n",
+  "  60:	ea5f 0008  	movs.w	r0, r8\n",
+  "  64:	ea7f 0008  	mvns.w	r0, r8\n",
+  "  68:	eb01 0008 	add.w	r0, r1, r8\n",
+  "  6c:	eb11 0008 	adds.w	r0, r1, r8\n",
+  "  70:	ebb1 0008 	subs.w	r0, r1, r8\n",
+  "  74:	eb50 0008 	adcs.w	r0, r0, r8\n",
+  "  78:	eb70 0008 	sbcs.w	r0, r0, r8\n",
+  "  7c:	ea10 0008 	ands.w	r0, r0, r8\n",
+  "  80:	ea50 0008 	orrs.w	r0, r0, r8\n",
+  "  84:	ea90 0008 	eors.w	r0, r0, r8\n",
+  "  88:	ea30 0008 	bics.w	r0, r0, r8\n",
+  "  8c:	ea10 0f08 	tst.w	r0, r8\n",
+  "  90:	eb10 0f08 	cmn.w	r0, r8\n",
+  "  94:	f1d8 0000 	rsbs	r0, r8, #0\n",
+  "  98:	f1d8 0800 	rsbs	r8, r8, #0\n",
+  "  9c:	bf08       	it	eq\n",
+  "  9e:	ea7f 0001  	mvnseq.w	r0, r1\n",
+  "  a2:	bf08       	it	eq\n",
+  "  a4:	eb11 0002 	addseq.w	r0, r1, r2\n",
+  "  a8:	bf08       	it	eq\n",
+  "  aa:	ebb1 0002 	subseq.w	r0, r1, r2\n",
+  "  ae:	bf08       	it	eq\n",
+  "  b0:	eb50 0001 	adcseq.w	r0, r0, r1\n",
+  "  b4:	bf08       	it	eq\n",
+  "  b6:	eb70 0001 	sbcseq.w	r0, r0, r1\n",
+  "  ba:	bf08       	it	eq\n",
+  "  bc:	ea10 0001 	andseq.w	r0, r0, r1\n",
+  "  c0:	bf08       	it	eq\n",
+  "  c2:	ea50 0001 	orrseq.w	r0, r0, r1\n",
+  "  c6:	bf08       	it	eq\n",
+  "  c8:	ea90 0001 	eorseq.w	r0, r0, r1\n",
+  "  cc:	bf08       	it	eq\n",
+  "  ce:	ea30 0001 	bicseq.w	r0, r0, r1\n",
+  "  d2:	bf08       	it	eq\n",
+  "  d4:	43c8      	mvneq	r0, r1\n",
+  "  d6:	bf08       	it	eq\n",
+  "  d8:	1888      	addeq	r0, r1, r2\n",
+  "  da:	bf08       	it	eq\n",
+  "  dc:	1a88      	subeq	r0, r1, r2\n",
+  "  de:	bf08       	it	eq\n",
+  "  e0:	4148      	adceq	r0, r1\n",
+  "  e2:	bf08       	it	eq\n",
+  "  e4:	4188      	sbceq	r0, r1\n",
+  "  e6:	bf08       	it	eq\n",
+  "  e8:	4008      	andeq	r0, r1\n",
+  "  ea:	bf08       	it	eq\n",
+  "  ec:	4308      	orreq	r0, r1\n",
+  "  ee:	bf08       	it	eq\n",
+  "  f0:	4048      	eoreq	r0, r1\n",
+  "  f2:	bf08       	it	eq\n",
+  "  f4:	4388      	biceq	r0, r1\n",
+  "  f6:	4608      	mov	r0, r1\n",
+  "  f8:	43c8      	mvns	r0, r1\n",
+  "  fa:	4408      	add	r0, r1\n",
+  "  fc:	1888      	adds	r0, r1, r2\n",
+  "  fe:	1a88      	subs	r0, r1, r2\n",
+  " 100:	4148      	adcs	r0, r1\n",
+  " 102:	4188      	sbcs	r0, r1\n",
+  " 104:	4008      	ands	r0, r1\n",
+  " 106:	4308      	orrs	r0, r1\n",
+  " 108:	4048      	eors	r0, r1\n",
+  " 10a:	4388      	bics	r0, r1\n",
+  " 10c:	4641		mov	r1, r8\n",
+  " 10e:	4681		mov	r9, r0\n",
+  " 110:	46c8		mov	r8, r9\n",
+  " 112:	4441		add	r1, r8\n",
+  " 114:	4481		add	r9, r0\n",
+  " 116:	44c8		add	r8, r9\n",
+  " 118:	4248   	   	negs	r0, r1\n",
+  " 11a:	4240   	   	negs	r0, r0\n",
+  " 11c:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
 const char* DataProcessingImmediateResults[] = {
@@ -66,6 +148,8 @@
   "  3a:	1f48      	subs	r0, r1, #5\n",
   "  3c:	2055      	movs	r0, #85	; 0x55\n",
   "  3e:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
+  "  42:	1d48      	adds  r0, r1, #5\n",
+  "  44:	1f48      	subs  r0, r1, #5\n",
   nullptr
 };
 const char* DataProcessingModifiedImmediateResults[] = {
@@ -100,13 +184,18 @@
   "   0:	0123      	lsls	r3, r4, #4\n",
   "   2:	0963      	lsrs	r3, r4, #5\n",
   "   4:	11a3      	asrs	r3, r4, #6\n",
-  "   6:	ea4f 13f4 	mov.w	r3, r4, ror #7\n",
-  "   a:	41e3      	rors	r3, r4\n",
-  "   c:	ea4f 1804 	mov.w	r8, r4, lsl #4\n",
-  "  10:	ea4f 1854 	mov.w	r8, r4, lsr #5\n",
-  "  14:	ea4f 18a4 	mov.w	r8, r4, asr #6\n",
-  "  18:	ea4f 18f4 	mov.w	r8, r4, ror #7\n",
-  "  1c:	ea4f 0834 	mov.w	r8, r4, rrx\n",
+  "   6:	ea5f 13f4 	movs.w	r3, r4, ror #7\n",
+  "   a:	ea5f 0334 	movs.w	r3, r4, rrx\n",
+  "   e:	ea4f 1304 	mov.w	r3, r4, lsl #4\n",
+  "  12:	ea4f 1354 	mov.w	r3, r4, lsr #5\n",
+  "  16:	ea4f 13a4 	mov.w	r3, r4, asr #6\n",
+  "  1a:	ea4f 13f4 	mov.w	r3, r4, ror #7\n",
+  "  1e:	ea4f 0334 	mov.w	r3, r4, rrx\n",
+  "  22:	ea5f 1804 	movs.w	r8, r4, lsl #4\n",
+  "  26:	ea5f 1854 	movs.w	r8, r4, lsr #5\n",
+  "  2a:	ea5f 18a4 	movs.w	r8, r4, asr #6\n",
+  "  2e:	ea5f 18f4 	movs.w	r8, r4, ror #7\n",
+  "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
 const char* BasicLoadResults[] = {
@@ -1511,7 +1600,7 @@
   " 7fc:	23fa      	movs	r3, #250	; 0xfa\n",
   " 7fe:	23fc      	movs	r3, #252	; 0xfc\n",
   " 800:	23fe      	movs	r3, #254	; 0xfe\n",
-  " 802:	0011      	movs	r1, r2\n",
+  " 802:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* Branch32Results[] = {
@@ -2541,7 +2630,7 @@
   " 800:	23fc      	movs	r3, #252	; 0xfc\n",
   " 802:	23fe      	movs	r3, #254	; 0xfe\n",
   " 804:	2300      	movs	r3, #0\n",
-  " 806:	0011      	movs	r1, r2\n",
+  " 806:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* CompareAndBranchMaxResults[] = {
@@ -2610,7 +2699,7 @@
   "  7c:	237a      	movs	r3, #122	; 0x7a\n",
   "  7e:	237c      	movs	r3, #124	; 0x7c\n",
   "  80:	237e      	movs	r3, #126	; 0x7e\n",
-  "  82:	0011      	movs	r1, r2\n",
+  "  82:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* CompareAndBranchRelocation16Results[] = {
@@ -2681,7 +2770,7 @@
   "  80:	237c      	movs	r3, #124	; 0x7c\n",
   "  82:	237e      	movs	r3, #126	; 0x7e\n",
   "  84:	2380      	movs	r3, #128	; 0x80\n",
-  "  86:	0011      	movs	r1, r2\n",
+  "  86:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* CompareAndBranchRelocation32Results[] = {
@@ -3712,7 +3801,7 @@
   " 802:	23fc      	movs	r3, #252	; 0xfc\n",
   " 804:	23fe      	movs	r3, #254	; 0xfe\n",
   " 806:	2300      	movs	r3, #0\n",
-  " 808:	0011      	movs	r1, r2\n",
+  " 808:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* MixedBranch32Results[] = {
@@ -4743,7 +4832,7 @@
   " 802:	23fe      	movs	r3, #254	; 0xfe\n",
   " 804:	2300      	movs	r3, #0\n",
   " 806:	f7ff bbfd 	b.w	4 <MixedBranch32+0x4>\n",
-  " 80a:	0011      	movs	r1, r2\n",
+  " 80a:	4611      	mov	r1, r2\n",
   nullptr
 };
 const char* ShiftsResults[] = {
@@ -4753,28 +4842,46 @@
   "   6:	4088      	lsls	r0, r1\n",
   "   8:	40c8      	lsrs	r0, r1\n",
   "   a:	4108      	asrs	r0, r1\n",
-  "   c:	ea4f 1841 	mov.w	r8, r1, lsl #5\n",
-  "  10:	ea4f 1058 	mov.w	r0, r8, lsr #5\n",
-  "  14:	ea4f 1861 	mov.w	r8, r1, asr #5\n",
-  "  18:	ea4f 1078 	mov.w	r0, r8, ror #5\n",
-  "  1c:	fa01 f002 	lsl.w	r0, r1, r2\n",
-  "  20:	fa21 f002 	lsr.w	r0, r1, r2\n",
-  "  24:	fa41 f002 	asr.w	r0, r1, r2\n",
-  "  28:	fa61 f002 	ror.w	r0, r1, r2\n",
-  "  2c:	fa01 f802 	lsl.w	r8, r1, r2\n",
-  "  30:	fa28 f002 	lsr.w	r0, r8, r2\n",
-  "  34:	fa41 f008 	asr.w	r0, r1, r8\n",
-  "  38:	ea5f 1841 	movs.w	r8, r1, lsl #5\n",
-  "  3c:	ea5f 1058 	movs.w	r0, r8, lsr #5\n",
-  "  40:	ea5f 1861 	movs.w	r8, r1, asr #5\n",
-  "  44:	ea5f 1078 	movs.w	r0, r8, ror #5\n",
-  "  48:	fa11 f002 	lsls.w	r0, r1, r2\n",
-  "  4c:	fa31 f002 	lsrs.w	r0, r1, r2\n",
-  "  50:	fa51 f002 	asrs.w	r0, r1, r2\n",
-  "  54:	fa71 f002 	rors.w	r0, r1, r2\n",
-  "  58:	fa11 f802 	lsls.w	r8, r1, r2\n",
-  "  5c:	fa38 f002 	lsrs.w	r0, r8, r2\n",
-  "  60:	fa51 f008 	asrs.w	r0, r1, r8\n",
+  "   c:	41c8      	rors	r0, r1\n",
+  "   e:	0148      	lsls	r0, r1, #5\n",
+  "  10:	0948      	lsrs	r0, r1, #5\n",
+  "  12:	1148      	asrs	r0, r1, #5\n",
+  "  14:	4088      	lsls	r0, r1\n",
+  "  16:	40c8      	lsrs	r0, r1\n",
+  "  18:	4108      	asrs	r0, r1\n",
+  "  1a:	41c8      	rors	r0, r1\n",
+  "  1c:	ea4f 1041 	mov.w	r0, r1, lsl #5\n",
+  "  20:	ea4f 1051 	mov.w	r0, r1, lsr #5\n",
+  "  24:	ea4f 1061 	mov.w	r0, r1, asr #5\n",
+  "  28:	fa00 f001 	lsl.w	r0, r0, r1\n",
+  "  2c:	fa20 f001 	lsr.w	r0, r0, r1\n",
+  "  30:	fa40 f001 	asr.w	r0, r0, r1\n",
+  "  34:	fa60 f001 	ror.w	r0, r0, r1\n",
+  "  38:	ea4f 1071 	mov.w	r0, r1, ror #5\n",
+  "  3c:	ea5f 1071 	movs.w	r0, r1, ror #5\n",
+  "  40:	ea4f 1071 	mov.w	r0, r1, ror #5\n",
+  "  44:	ea4f 1841 	mov.w	r8, r1, lsl #5\n",
+  "  48:	ea4f 1058 	mov.w	r0, r8, lsr #5\n",
+  "  4c:	ea4f 1861 	mov.w	r8, r1, asr #5\n",
+  "  50:	ea4f 1078 	mov.w	r0, r8, ror #5\n",
+  "  54:	fa01 f002 	lsl.w	r0, r1, r2\n",
+  "  58:	fa21 f002 	lsr.w	r0, r1, r2\n",
+  "  5c:	fa41 f002 	asr.w	r0, r1, r2\n",
+  "  60:	fa61 f002 	ror.w	r0, r1, r2\n",
+  "  64:	fa01 f802 	lsl.w	r8, r1, r2\n",
+  "  68:	fa28 f002 	lsr.w	r0, r8, r2\n",
+  "  6c:	fa41 f008 	asr.w	r0, r1, r8\n",
+  "  70:	ea5f 1841 	movs.w	r8, r1, lsl #5\n",
+  "  74:	ea5f 1058 	movs.w	r0, r8, lsr #5\n",
+  "  78:	ea5f 1861 	movs.w	r8, r1, asr #5\n",
+  "  7c:	ea5f 1078 	movs.w	r0, r8, ror #5\n",
+  "  80:	fa11 f002 	lsls.w	r0, r1, r2\n",
+  "  84:	fa31 f002 	lsrs.w	r0, r1, r2\n",
+  "  88:	fa51 f002 	asrs.w	r0, r1, r2\n",
+  "  8c:	fa71 f002 	rors.w	r0, r1, r2\n",
+  "  90:	fa11 f802 	lsls.w	r8, r1, r2\n",
+  "  94:	fa38 f002 	lsrs.w	r0, r8, r2\n",
+  "  98:	fa51 f008 	asrs.w	r0, r1, r8\n",
   nullptr
 };
 const char* LoadStoreRegOffsetResults[] = {
diff --git a/dexlist/Android.mk b/dexlist/Android.mk
index 9fbd847..6ec6c97 100755
--- a/dexlist/Android.mk
+++ b/dexlist/Android.mk
@@ -14,8 +14,6 @@
 
 # TODO(ajcbik): Art-i-fy this makefile
 
-# TODO(ajcbik): rename dexlist2 into dexlist when Dalvik version is removed
-
 LOCAL_PATH:= $(call my-dir)
 
 dexlist_src_files := dexlist.cc
@@ -33,7 +31,7 @@
 LOCAL_C_INCLUDES := $(dexlist_c_includes)
 LOCAL_CFLAGS += -Wall
 LOCAL_SHARED_LIBRARIES += $(dexlist_libraries)
-LOCAL_MODULE := dexlist2
+LOCAL_MODULE := dexlist
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES)
 include $(BUILD_EXECUTABLE)
@@ -49,6 +47,6 @@
 LOCAL_C_INCLUDES := $(dexlist_c_includes)
 LOCAL_CFLAGS += -Wall
 LOCAL_SHARED_LIBRARIES += $(dexlist_libraries)
-LOCAL_MODULE := dexlist2
+LOCAL_MODULE := dexlist
 LOCAL_MULTILIB := $(ART_MULTILIB_OVERRIDE_host)
 include $(BUILD_HOST_EXECUTABLE)
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index d8fd242..1d0f75e 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -235,7 +235,7 @@
         gOptions.outputFileName = optarg;
         break;
       case 'm':
-        // If -m x.y.z is given, then find all instances of the
+        // If -m p.c.m is given, then find all instances of the
         // fully-qualified method name. This isn't really what
         // dexlist is for, but it's easy to do it here.
         {
diff --git a/dexlist/dexlist_test.cc b/dexlist/dexlist_test.cc
index 7b1b63d..82179de 100644
--- a/dexlist/dexlist_test.cc
+++ b/dexlist/dexlist_test.cc
@@ -42,12 +42,11 @@
 
   // Runs test with given arguments.
   bool Exec(const std::vector<std::string>& args, std::string* error_msg) {
-    // TODO(ajcbik): dexlist2 -> dexlist
     std::string file_path = GetTestAndroidRoot();
     if (IsHost()) {
-      file_path += "/bin/dexlist2";
+      file_path += "/bin/dexlist";
     } else {
-      file_path += "/xbin/dexlist2";
+      file_path += "/xbin/dexlist";
     }
     EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
     std::vector<std::string> exec_argv = { file_path };
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 1950d56..c553a18 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -78,6 +78,21 @@
   "kClassRoots",
 };
 
+// Map is so that we don't allocate multiple dex files for the same OatDexFile.
+static std::map<const OatFile::OatDexFile*,
+                std::unique_ptr<const DexFile>> opened_dex_files;
+
+const DexFile* OpenDexFile(const OatFile::OatDexFile* oat_dex_file, std::string* error_msg) {
+  DCHECK(oat_dex_file != nullptr);
+  auto it = opened_dex_files.find(oat_dex_file);
+  if (it != opened_dex_files.end()) {
+    return it->second.get();
+  }
+  const DexFile* ret = oat_dex_file->OpenDexFile(error_msg).release();
+  opened_dex_files.emplace(oat_dex_file, std::unique_ptr<const DexFile>(ret));
+  return ret;
+}
+
 class OatSymbolizer FINAL {
  public:
   class RodataWriter FINAL : public CodeOutput {
@@ -159,8 +174,8 @@
 
   void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file, Callback callback) {
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
-    if (dex_file.get() == nullptr) {
+    const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+    if (dex_file == nullptr) {
       return;
     }
     for (size_t class_def_index = 0;
@@ -172,7 +187,7 @@
       switch (type) {
         case kOatClassAllCompiled:
         case kOatClassSomeCompiled:
-          WalkOatClass(oat_class, *dex_file.get(), class_def, callback);
+          WalkOatClass(oat_class, *dex_file, class_def, callback);
           break;
 
         case kOatClassNoneCompiled:
@@ -504,8 +519,8 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
       std::string error_msg;
-      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
-      if (dex_file.get() == nullptr) {
+      const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+      if (dex_file == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
       } else {
@@ -533,8 +548,8 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
       std::string error_msg;
-      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
-      if (dex_file.get() == nullptr) {
+      const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+      if (dex_file == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
         continue;
@@ -593,8 +608,8 @@
     // Create the verifier early.
 
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
-    if (dex_file.get() == nullptr) {
+    const DexFile* const dex_file = OpenDexFile(&oat_dex_file, &error_msg);
+    if (dex_file == nullptr) {
       os << "NOT FOUND: " << error_msg << "\n\n";
       os << std::flush;
       return false;
@@ -621,7 +636,7 @@
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
       if (options_.list_classes_) continue;
-      if (!DumpOatClass(&vios, oat_class, *(dex_file.get()), class_def, &stop_analysis)) {
+      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis)) {
         success = false;
       }
       if (stop_analysis) {
@@ -638,7 +653,7 @@
     std::string error_msg;
     std::string dex_file_location = oat_dex_file.GetDexFileLocation();
 
-    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
+    const DexFile* const dex_file = OpenDexFile(&oat_dex_file, &error_msg);
     if (dex_file == nullptr) {
       os << "Failed to open dex file '" << dex_file_location << "': " << error_msg;
       return false;
@@ -2337,21 +2352,17 @@
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = runtime->GetClassLinker();
   class_linker->RegisterOatFile(oat_file);
-  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  std::vector<const DexFile*> class_path;
   for (const OatFile::OatDexFile* odf : oat_file->GetOatDexFiles()) {
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file = odf->OpenDexFile(&error_msg);
+    const DexFile* const dex_file = OpenDexFile(odf, &error_msg);
     CHECK(dex_file != nullptr) << error_msg;
     class_linker->RegisterDexFile(*dex_file);
-    dex_files.push_back(std::move(dex_file));
+    class_path.push_back(dex_file);
   }
 
   // Need a class loader.
   // Fake that we're a compiler.
-  std::vector<const DexFile*> class_path;
-  for (auto& dex_file : dex_files) {
-    class_path.push_back(dex_file.get());
-  }
   jobject class_loader = class_linker->CreatePathClassLoader(self, class_path);
 
   // Use the class loader while dumping.
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index be9af98..1599025 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -167,7 +167,8 @@
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index d6396c1..e45d828 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1141,6 +1141,17 @@
 END art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    mov    r0, r9                         @ Set up args.
+    blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
+END art_quick_deoptimize_from_compiled_code
+
+    /*
      * Signed 64-bit integer multiply.
      *
      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 0f06727..e9c816f 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -150,8 +150,8 @@
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index bfef0fa..169bc38 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1739,6 +1739,18 @@
     brk 0
 END art_quick_deoptimize
 
+    /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov    x0, xSELF                      // Pass thread.
+    bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
+    brk 0
+END art_quick_deoptimize_from_compiled_code
+
 
     /*
      * String's indexOf.
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 4e4b91f..6721e54 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -267,8 +267,8 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
   static_assert(!IsDirectEntrypoint(kQuickThrowStackOverflow), "Non-direct C stub marked direct.");
 
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
   static_assert(!IsDirectEntrypoint(kQuickDeoptimize), "Non-direct C stub marked direct.");
 
   // Atomic 64-bit load/store
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index cb49cf5..ba58c3f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1542,6 +1542,18 @@
 END art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    jal      artDeoptimizeFromCompiledCode  # artDeoptimizeFromCompiledCode(Thread*)
+                                            # Returns caller method's frame size.
+    move     $a0, rSELF                     # pass Thread::current
+END art_quick_deoptimize_from_compiled_code
+
+    /*
      * Long integer shift.  This is different from the generic 32/64-bit
      * binary operations because vAA/vBB are 64-bit but vCC (the shift
      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index ec02d5a..9f1f0e0 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -176,8 +176,8 @@
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize;
+  // Deoptimization from compiled code.
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // TODO - use lld/scd instructions for Mips64
   // Atomic 64-bit load/store
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 4bc049c..1b50b2e 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1603,5 +1603,17 @@
     move     $a0, rSELF        # pass Thread::current
 END art_quick_deoptimize
 
+    /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimizeFromCompiledCode
+ENTRY art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    jal      artDeoptimizeFromCompiledCode    # artDeoptimizeFromCompiledCode(Thread*, SP)
+                                              # Returns caller method's frame size.
+    move     $a0, rSELF                       # pass Thread::current
+END art_quick_deoptimize_from_compiled_code
+
 UNIMPLEMENTED art_quick_indexof
 UNIMPLEMENTED art_quick_string_compareto
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index e2632c1..10fc281 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -140,7 +140,7 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
   // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 9b2d59d..029a296 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1684,9 +1684,6 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize
     PUSH ebx                      // Entry point for a jump. Fake that we were called.
-.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path)  // Entry point for real calls
-                                                             // from compiled slow paths.
-SYMBOL(art_quick_deoptimize_from_compiled_slow_path):
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
@@ -1697,6 +1694,20 @@
 END_FUNCTION art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    subl LITERAL(12), %esp                      // Align stack.
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET                // Pass Thread::Current().
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
+    UNREACHABLE
+END_FUNCTION art_quick_deoptimize_from_compiled_code
+
+    /*
      * String's compareTo.
      *
      * On entry:
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index ef1bb5f..5cc72e3 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -144,7 +144,7 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
   // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 88270d9..1498a4b 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1728,9 +1728,6 @@
 DEFINE_FUNCTION art_quick_deoptimize
     pushq %rsi                     // Entry point for a jump. Fake that we were called.
                                    // Use hidden arg.
-.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path)  // Entry point for real calls
-                                                             // from compiled slow paths.
-SYMBOL(art_quick_deoptimize_from_compiled_slow_path):
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
                                    // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
@@ -1739,6 +1736,18 @@
 END_FUNCTION art_quick_deoptimize
 
     /*
+     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+                                                // Stack should be aligned now.
+    movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
+    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
+    UNREACHABLE
+END_FUNCTION art_quick_deoptimize_from_compiled_code
+
+    /*
      * String's compareTo.
      *
      * On entry:
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 56f7b35..e46402d 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -427,9 +427,16 @@
         self->ClearException();
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
-        result->SetJ(self->PopDeoptimizationReturnValue().GetJ());
+        mirror::Throwable* pending_exception = nullptr;
+        self->PopDeoptimizationContext(result, &pending_exception);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
+
+        // Restore the exception that was pending before deoptimization then interpret the
+        // deoptimized frames.
+        if (pending_exception != nullptr) {
+          self->SetException(pending_exception);
+        }
         interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
       }
       if (kLogInvocationStartAndReturn) {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 084c88e..5c1922e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 136
+#define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
             art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
 
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index 4ef1f9e..6d2c5e0 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -17,9 +17,11 @@
 #include "hash_set.h"
 
 #include <map>
+#include <forward_list>
 #include <sstream>
 #include <string>
 #include <unordered_set>
+#include <vector>
 
 #include <gtest/gtest.h>
 #include "hash_map.h"
@@ -258,4 +260,59 @@
   ASSERT_EQ(it->second, 124);
 }
 
+struct IsEmptyFnVectorInt {
+  void MakeEmpty(std::vector<int>& item) const {
+    item.clear();
+  }
+  bool IsEmpty(const std::vector<int>& item) const {
+    return item.empty();
+  }
+};
+
+template <typename T>
+size_t HashIntSequence(T begin, T end) {
+  size_t hash = 0;
+  for (auto iter = begin; iter != end; ++iter) {
+    hash = hash * 2 + *iter;
+  }
+  return hash;
+};
+
+struct VectorIntHashEquals {
+  std::size_t operator()(const std::vector<int>& item) const {
+    return HashIntSequence(item.begin(), item.end());
+  }
+
+  std::size_t operator()(const std::forward_list<int>& item) const {
+    return HashIntSequence(item.begin(), item.end());
+  }
+
+  bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
+    return a == b;
+  }
+
+  bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
+    auto aiter = a.begin();
+    auto biter = b.begin();
+    while (aiter != a.end() && biter != b.end()) {
+      if (*aiter != *biter) {
+        return false;
+      }
+      aiter++;
+      biter++;
+    }
+    return (aiter == a.end() && biter == b.end());
+  }
+};
+
+TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
+  HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
+  hash_set.Insert(std::vector<int>({1, 2, 3, 4}));
+  hash_set.Insert(std::vector<int>({4, 2}));
+  ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1})));
+  ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4})));
+  ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1})));
+  ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
+}
+
 }  // namespace art
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 5f2caef..6bf203c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -64,6 +64,7 @@
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kTransactionLogLock,
+  kMarkSweepMarkStackLock,
   kJniWeakGlobalsLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
@@ -80,7 +81,6 @@
   kArenaPoolLock,
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
-  kMarkSweepMarkStackLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kTracingUniqueMethodsLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index a722fa8..e78914c 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1834,11 +1834,15 @@
     klass.Assign(AllocClass(self, SizeOfClassWithoutEmbeddedTables(dex_file, dex_class_def)));
   }
   if (UNLIKELY(klass.Get() == nullptr)) {
-    CHECK(self->IsExceptionPending());  // Expect an OOME.
+    self->AssertPendingOOMException();
     return nullptr;
   }
-  klass->SetDexCache(RegisterDexFile(dex_file));
-
+  mirror::DexCache* dex_cache = RegisterDexFile(dex_file);
+  if (dex_cache == nullptr) {
+    self->AssertPendingOOMException();
+    return nullptr;
+  }
+  klass->SetDexCache(dex_cache);
   SetupClass(dex_file, dex_class_def, klass, class_loader.Get());
 
   // Mark the string class by setting its access flag.
@@ -2498,7 +2502,7 @@
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
-    mirror::DexCache* dex_cache = FindDexCacheLocked(dex_file, true);
+    mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
     if (dex_cache != nullptr) {
       return dex_cache;
     }
@@ -2508,13 +2512,15 @@
   // get to a suspend point.
   StackHandleScope<1> hs(self);
   Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
-  CHECK(h_dex_cache.Get() != nullptr) << "Failed to allocate dex cache for "
-                                      << dex_file.GetLocation();
   WriterMutexLock mu(self, dex_lock_);
-  mirror::DexCache* dex_cache = FindDexCacheLocked(dex_file, true);
+  mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
   if (dex_cache != nullptr) {
     return dex_cache;
   }
+  if (h_dex_cache.Get() == nullptr) {
+    self->AssertPendingOOMException();
+    return nullptr;
+  }
   RegisterDexFileLocked(dex_file, h_dex_cache);
   return h_dex_cache.Get();
 }
@@ -2525,32 +2531,33 @@
   RegisterDexFileLocked(dex_file, dex_cache);
 }
 
-mirror::DexCache* ClassLinker::FindDexCache(const DexFile& dex_file, bool allow_failure) {
-  Thread* const self = Thread::Current();
+mirror::DexCache* ClassLinker::FindDexCache(Thread* self,
+                                            const DexFile& dex_file,
+                                            bool allow_failure) {
   ReaderMutexLock mu(self, dex_lock_);
-  return FindDexCacheLocked(dex_file, allow_failure);
+  return FindDexCacheLocked(self, dex_file, allow_failure);
 }
 
-mirror::DexCache* ClassLinker::FindDexCacheLocked(const DexFile& dex_file, bool allow_failure) {
-  Thread* const self = Thread::Current();
+mirror::DexCache* ClassLinker::FindDexCacheLocked(Thread* self,
+                                                  const DexFile& dex_file,
+                                                  bool allow_failure) {
   // Search assuming unique-ness of dex file.
-  for (jobject weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) {
-      return dex_cache;
-    }
-  }
-  // Check dex file by location, this is used for oatdump.
-  std::string location(dex_file.GetLocation());
-  for (jobject weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache != nullptr && dex_cache->GetDexFile()->GetLocation() == location) {
-      return dex_cache;
+  JavaVMExt* const vm = self->GetJniEnv()->vm;
+  {
+    MutexLock mu(self, vm->WeakGlobalsLock());
+    for (jobject weak_root : dex_caches_) {
+      DCHECK_EQ(GetIndirectRefKind(weak_root), kWeakGlobal);
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+          vm->DecodeWeakGlobalLocked(self, weak_root));
+      if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) {
+        return dex_cache;
+      }
     }
   }
   if (allow_failure) {
     return nullptr;
   }
+  std::string location(dex_file.GetLocation());
   // Failure, dump diagnostic and abort.
   for (jobject weak_root : dex_caches_) {
     mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index cc56e8b..2a7162b 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -309,7 +309,9 @@
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::DexCache* FindDexCache(const DexFile& dex_file, bool allow_failure = false)
+  mirror::DexCache* FindDexCache(Thread* self,
+                                 const DexFile& dex_file,
+                                 bool allow_failure = false)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupDexCaches(ArtMethod* resolution_method)
       REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -559,7 +561,9 @@
 
   void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       REQUIRES(dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::DexCache* FindDexCacheLocked(const DexFile& dex_file, bool allow_failure)
+  mirror::DexCache* FindDexCacheLocked(Thread* self,
+                                       const DexFile& dex_file,
+                                       bool allow_failure)
       REQUIRES(dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 0d1c875..c3191fa 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -355,7 +355,7 @@
     TestRootVisitor visitor;
     class_linker_->VisitRoots(&visitor, kVisitRootFlagAllRoots);
     // Verify the dex cache has resolution methods in all resolved method slots
-    mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
+    mirror::DexCache* dex_cache = class_linker_->FindDexCache(Thread::Current(), dex);
     auto* resolved_methods = dex_cache->GetResolvedMethods();
     for (size_t i = 0; i < static_cast<size_t>(resolved_methods->GetLength()); i++) {
       EXPECT_TRUE(resolved_methods->GetElementPtrSize<ArtMethod*>(i, sizeof(void*)) != nullptr)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 67099d7..8d34f5a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1191,6 +1191,10 @@
       if (error != JDWP::ERR_NONE) {
         return error;
       }
+      // Check if the object's type is compatible with the array's type.
+      if (o != nullptr && !o->InstanceOf(oa->GetClass()->GetComponentType())) {
+        return JDWP::ERR_TYPE_MISMATCH;
+      }
       oa->Set<false>(offset + i, o);
     }
   }
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index a4feac1..d749664 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -28,17 +28,30 @@
 
 namespace art {
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-
+NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
     LOG(INFO) << "Deopting:";
     self->Dump(LOG(INFO));
   }
 
-  self->PushAndClearDeoptimizationReturnValue();
+  self->AssertHasDeoptimizationContext();
   self->SetException(Thread::GetDeoptimizationException());
   self->QuickDeliverException();
 }
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  artDeoptimizeImpl(self);
+}
+
+extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  // Before deoptimizing to interpreter, we must push the deoptimization context.
+  JValue return_value;
+  return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
+  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  artDeoptimizeImpl(self);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index ad5ee84..8e660a2 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -51,6 +51,9 @@
                                                               uint64_t gpr_result,
                                                               uint64_t fpr_result)
     SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Instrumentation exit stub must not be entered with a pending exception.
+  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
+                                     << self->GetException()->Dump();
   // Compute address of return PC and sanity check that it currently holds 0.
   size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index aa35ec1..0c7caf3 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -688,8 +688,12 @@
     // Request a stack deoptimization if needed
     ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
     if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+      // Push the context of the deoptimization stack so we can restore the return value and the
+      // exception before executing the deoptimized frames.
+      self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+
+      // Set special exception to cause deoptimization.
       self->SetException(Thread::GetDeoptimizationException());
-      self->SetDeoptimizationReturnValue(result, shorty[0] == 'L');
     }
 
     // No need to restore the args since the method has already been run by the interpreter.
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 8209dc8..2842c5a 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -70,7 +70,8 @@
   return reinterpret_cast<const void*>(art_quick_instrumentation_entry);
 }
 
-extern "C" void art_quick_deoptimize_from_compiled_slow_path();
+// Stub to deoptimize from compiled code.
+extern "C" void art_quick_deoptimize_from_compiled_code();
 
 // The return_pc of instrumentation exit stub.
 extern "C" void art_quick_instrumentation_exit();
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f7a3cd5..7db8888 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -72,15 +72,12 @@
     EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count, handling_signal_, 4);
-    EXPECT_OFFSET_DIFFP(Thread, tls32_, handling_signal_,
-                        deoptimization_return_value_is_reference, 4);
 
     // TODO: Better connection. Take alignment into account.
     EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4,
                            thread_tls32_to_tls64);
 
-    EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, deoptimization_return_value, 8);
-    EXPECT_OFFSET_DIFFP(Thread, tls64_, deoptimization_return_value, stats, 8);
+    EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, stats, 8);
 
     // TODO: Better connection. Take alignment into account.
     EXPECT_OFFSET_DIFF_GT3(Thread, tls64_.stats, tlsPtr_.card_table, 8, thread_tls64_to_tlsptr);
@@ -108,8 +105,8 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, single_step_control, stacked_shadow_frame_record,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stacked_shadow_frame_record,
-                        deoptimization_return_value_stack, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_return_value_stack, name, sizeof(void*));
+                        deoptimization_context_stack, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_context_stack, name, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
                         sizeof(void*));
diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index 888c0d2..ab921d9 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc
@@ -27,11 +27,11 @@
 
 TEST_F(ReferenceQueueTest, EnqueueDequeue) {
   Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
   StackHandleScope<20> hs(self);
   Mutex lock("Reference queue lock");
   ReferenceQueue queue(&lock);
   ASSERT_TRUE(queue.IsEmpty());
-  ScopedObjectAccess soa(self);
   ASSERT_EQ(queue.GetLength(), 0U);
   auto ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
@@ -58,10 +58,10 @@
 
 TEST_F(ReferenceQueueTest, Dump) {
   Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
   StackHandleScope<20> hs(self);
   Mutex lock("Reference queue lock");
   ReferenceQueue queue(&lock);
-  ScopedObjectAccess soa(self);
   queue.Dump(LOG(INFO));
   auto weak_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index 222083b..ca206ef 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -19,8 +19,9 @@
 
 #include "handle_scope.h"
 
+#include "base/mutex.h"
 #include "handle.h"
-#include "thread.h"
+#include "thread-inl.h"
 #include "verify_object-inl.h"
 
 namespace art {
@@ -29,6 +30,9 @@
 inline StackHandleScope<kNumReferences>::StackHandleScope(Thread* self, mirror::Object* fill_value)
     : HandleScope(self->GetTopHandleScope(), kNumReferences), self_(self), pos_(0) {
   DCHECK_EQ(self, Thread::Current());
+  if (kDebugLocking) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
   static_assert(kNumReferences >= 1, "StackHandleScope must contain at least 1 reference");
   // TODO: Figure out how to use a compile assert.
   CHECK_EQ(&storage_[0], GetReferences());
@@ -42,6 +46,9 @@
 inline StackHandleScope<kNumReferences>::~StackHandleScope() {
   HandleScope* top_handle_scope = self_->PopHandleScope();
   DCHECK_EQ(top_handle_scope, this);
+  if (kDebugLocking) {
+    Locks::mutator_lock_->AssertSharedHeld(self_);
+  }
 }
 
 inline size_t HandleScope::SizeOf(uint32_t num_references) {
@@ -59,6 +66,9 @@
 
 inline mirror::Object* HandleScope::GetReference(size_t i) const {
   DCHECK_LT(i, number_of_references_);
+  if (kDebugLocking) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
   return GetReferences()[i].AsMirrorPtr();
 }
 
@@ -73,6 +83,9 @@
 }
 
 inline void HandleScope::SetReference(size_t i, mirror::Object* object) {
+  if (kDebugLocking) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
   DCHECK_LT(i, number_of_references_);
   GetReferences()[i].Assign(object);
 }
@@ -104,6 +117,9 @@
 
 template<size_t kNumReferences>
 inline void StackHandleScope<kNumReferences>::SetReference(size_t i, mirror::Object* object) {
+  if (kDebugLocking) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
   DCHECK_LT(i, kNumReferences);
   VerifyObject(object);
   GetReferences()[i].Assign(object);
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index ee6b020..e2094dc 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -765,8 +765,9 @@
       okay = !file_output.Errors();
 
       if (okay) {
-        // Check for expected size.
-        CHECK_EQ(file_output.SumLength(), overall_size);
+        // Check for expected size. Output is expected to be less-or-equal than first phase, see
+        // b/23521263.
+        DCHECK_LE(file_output.SumLength(), overall_size);
       }
       output_ = nullptr;
     }
@@ -810,8 +811,8 @@
     // Write the dump.
     ProcessHeap(true);
 
-    // Check for expected size.
-    CHECK_EQ(net_output.SumLength(), overall_size + kChunkHeaderSize);
+    // Check for expected size. See DumpToFile for comment.
+    DCHECK_LE(net_output.SumLength(), overall_size + kChunkHeaderSize);
     output_ = nullptr;
 
     return true;
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e28d578..63c02ed 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1016,7 +1016,8 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->SetDeoptimizationReturnValue(return_value, return_shorty == 'L');
+    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+                                    nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
   } else {
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 2fd0517..ef7a924 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -578,6 +578,13 @@
 
 mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
   MutexLock mu(self, weak_globals_lock_);
+  return DecodeWeakGlobalLocked(self, ref);
+}
+
+mirror::Object* JavaVMExt::DecodeWeakGlobalLocked(Thread* self, IndirectRef ref) {
+  if (kDebugLocking) {
+    weak_globals_lock_.AssertHeld(self);
+  }
   while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
     weak_globals_add_condition_.WaitHoldingLocks(self);
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index d70fc47..e80266f 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -133,7 +133,16 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!globals_lock_);
 
   mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!weak_globals_lock_);
+
+  mirror::Object* DecodeWeakGlobalLocked(Thread* self, IndirectRef ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(weak_globals_lock_);
+
+  Mutex& WeakGlobalsLock() RETURN_CAPABILITY(weak_globals_lock_) {
+    return weak_globals_lock_;
+  }
 
   void UpdateWeakGlobal(Thread* self, IndirectRef ref, mirror::Object* result)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index eda6c9b..28a830d 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -146,8 +146,8 @@
 inline size_t String::SizeOf() {
   size_t size = sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>());
   // String.equals() intrinsics assume zero-padding up to kObjectAlignment,
-  // so make sure the padding is actually zero-initialized if the allocator
-  // chooses to clear, or GC compaction chooses to copy, only SizeOf() bytes.
+  // so make sure the zero-padding is actually copied around if GC compaction
+  // chooses to copy only SizeOf() bytes.
   // http://b/23528461
   return RoundUp(size, kObjectAlignment);
 }
@@ -155,21 +155,35 @@
 template <bool kIsInstrumented, typename PreFenceVisitor>
 inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type,
                              const PreFenceVisitor& pre_fence_visitor) {
-  size_t header_size = sizeof(String);
-  size_t data_size = sizeof(uint16_t) * utf16_length;
+  constexpr size_t header_size = sizeof(String);
+  static_assert(sizeof(utf16_length) <= sizeof(size_t),
+                "static_cast<size_t>(utf16_length) must not lose bits.");
+  size_t length = static_cast<size_t>(utf16_length);
+  size_t data_size = sizeof(uint16_t) * length;
   size_t size = header_size + data_size;
+  // String.equals() intrinsics assume zero-padding up to kObjectAlignment,
+  // so make sure the allocator clears the padding as well.
+  // http://b/23528461
+  size_t alloc_size = RoundUp(size, kObjectAlignment);
   Class* string_class = GetJavaLangString();
 
   // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
-  if (UNLIKELY(size < data_size)) {
+  // Do this by comparing with the maximum length that will _not_ cause an overflow.
+  constexpr size_t overflow_length = (-header_size) / sizeof(uint16_t);  // Unsigned arithmetic.
+  constexpr size_t max_alloc_length = overflow_length - 1u;
+  static_assert(IsAligned<sizeof(uint16_t)>(kObjectAlignment),
+                "kObjectAlignment must be at least as big as Java char alignment");
+  constexpr size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / sizeof(uint16_t));
+  if (UNLIKELY(length > max_length)) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
                                              PrettyDescriptor(string_class).c_str(),
                                              utf16_length).c_str());
     return nullptr;
   }
+
   gc::Heap* heap = Runtime::Current()->GetHeap();
   return down_cast<String*>(
-      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, string_class, size,
+      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, string_class, alloc_size,
                                                             allocator_type, pre_fence_visitor));
 }
 
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index e1173bb..69112b1 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -290,15 +290,13 @@
 static void CommonWaitSetup(MonitorTest* test, ClassLinker* class_linker, uint64_t create_sleep,
                             int64_t c_millis, bool c_expected, bool interrupt, uint64_t use_sleep,
                             int64_t u_millis, bool u_expected, const char* pool_name) {
+  Thread* const self = Thread::Current();
+  ScopedObjectAccess soa(self);
   // First create the object we lock. String is easiest.
-  StackHandleScope<3> hs(Thread::Current());
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    test->object_ = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(Thread::Current(),
-                                                                       "hello, world!"));
-    test->watchdog_object_ = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(Thread::Current(),
-                                                                                "hello, world!"));
-  }
+  StackHandleScope<3> hs(soa.Self());
+  test->object_ = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "hello, world!"));
+  test->watchdog_object_ = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(self,
+                                                                              "hello, world!"));
 
   // Create the barrier used to synchronize.
   test->barrier_ = std::unique_ptr<Barrier>(new Barrier(2));
@@ -308,23 +306,17 @@
   // Fill the heap.
   std::unique_ptr<StackHandleScope<kMaxHandles>> hsp;
   std::vector<MutableHandle<mirror::Object>> handles;
-  {
-    Thread* self = Thread::Current();
-    ScopedObjectAccess soa(self);
 
-    // Our job: Fill the heap, then try Wait.
-    FillHeap(self, class_linker, &hsp, &handles);
+  // Our job: Fill the heap, then try Wait.
+  FillHeap(soa.Self(), class_linker, &hsp, &handles);
 
-    // Now release everything.
-    auto it = handles.begin();
-    auto end = handles.end();
+  // Now release everything.
+  for (MutableHandle<mirror::Object>& h : handles) {
+    h.Assign(nullptr);
+  }
 
-    for ( ; it != end; ++it) {
-      it->Assign(nullptr);
-    }
-  }  // Need to drop the mutator lock to allow barriers.
-
-  Thread* self = Thread::Current();
+  // Need to drop the mutator lock to allow barriers.
+  soa.Self()->TransitionFromRunnableToSuspended(kNative);
   ThreadPool thread_pool(pool_name, 3);
   thread_pool.AddTask(self, new CreateTask(test, create_sleep, c_millis, c_expected));
   if (interrupt) {
@@ -336,19 +328,19 @@
   thread_pool.StartWorkers(self);
 
   // Wait on completion barrier.
-  test->complete_barrier_->Wait(Thread::Current());
+  test->complete_barrier_->Wait(self);
   test->completed_ = true;
 
   // Wake the watchdog.
   {
-    ScopedObjectAccess soa(Thread::Current());
-
+    ScopedObjectAccess soa2(self);
     test->watchdog_object_.Get()->MonitorEnter(self);     // Lock the object.
     test->watchdog_object_.Get()->NotifyAll(self);        // Wake up waiting parties.
     test->watchdog_object_.Get()->MonitorExit(self);      // Release the lock.
   }
 
   thread_pool.StopWorkers(self);
+  soa.Self()->TransitionFromSuspendedToRunnable();
 }
 
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 9bd320c..3b84bfa 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -171,7 +171,7 @@
     if (array == nullptr) {
       ScopedObjectAccess soa(env);
       for (auto& dex_file : dex_files) {
-        if (Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file, true) != nullptr) {
+        if (linker->FindDexCache(soa.Self(), *dex_file, true) != nullptr) {
           dex_file.release();
         }
       }
@@ -208,8 +208,9 @@
   //
   // TODO: The Runtime should support unloading of classes and freeing of the
   // dex files for those unloaded classes rather than leaking dex files here.
-  for (auto& dex_file : *dex_files) {
-    if (Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file, true) == nullptr) {
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  for (const DexFile* dex_file : *dex_files) {
+    if (class_linker->FindDexCache(soa.Self(), *dex_file, true) == nullptr) {
       delete dex_file;
     }
   }
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 5a9c43b..4f95723 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -428,9 +428,10 @@
     return;
   }
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  Thread* const self = Thread::Current();
   for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
     CHECK(dex_file != nullptr);
-    mirror::DexCache* const dex_cache = class_linker->FindDexCache(*dex_file, true);
+    mirror::DexCache* const dex_cache = class_linker->FindDexCache(self, *dex_file, true);
     // If dex cache was deallocated, just continue.
     if (dex_cache == nullptr) {
       continue;
diff --git a/runtime/oat.h b/runtime/oat.h
index 29dd76c..1520a9b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '8', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '9', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 431ba95..8f2ca30 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -236,6 +236,9 @@
     self->GetJniEnv()->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                                             WellKnownClasses::java_lang_Daemons_stop);
   }
+
+  Trace::Shutdown();
+
   if (attach_shutdown_thread) {
     DetachCurrentThread();
     self = nullptr;
@@ -246,8 +249,6 @@
     BackgroundMethodSamplingProfiler::Shutdown();
   }
 
-  Trace::Shutdown();
-
   // Make sure to let the GC complete if it is running.
   heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index a33e150..63534b1 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -162,27 +162,41 @@
   ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
 }
 
-class DeoptimizationReturnValueRecord {
+class DeoptimizationContextRecord {
  public:
-  DeoptimizationReturnValueRecord(const JValue& ret_val,
-                                  bool is_reference,
-                                  DeoptimizationReturnValueRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), link_(link) {}
+  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+                              mirror::Throwable* pending_exception,
+                              DeoptimizationContextRecord* link)
+      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+        link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
-  DeoptimizationReturnValueRecord* GetLink() const { return link_; }
-  mirror::Object** GetGCRoot() {
+  mirror::Throwable* GetPendingException() const { return pending_exception_; }
+  DeoptimizationContextRecord* GetLink() const { return link_; }
+  mirror::Object** GetReturnValueAsGCRoot() {
     DCHECK(is_reference_);
     return ret_val_.GetGCRoot();
   }
+  mirror::Object** GetPendingExceptionAsGCRoot() {
+    return reinterpret_cast<mirror::Object**>(&pending_exception_);
+  }
 
  private:
+  // The value returned by the method at the top of the stack before deoptimization.
   JValue ret_val_;
-  const bool is_reference_;
-  DeoptimizationReturnValueRecord* const link_;
 
-  DISALLOW_COPY_AND_ASSIGN(DeoptimizationReturnValueRecord);
+  // Indicates whether the returned value is a reference. If so, the GC will visit it.
+  const bool is_reference_;
+
+  // The exception that was pending before deoptimization (or null if there was no pending
+  // exception).
+  mirror::Throwable* pending_exception_;
+
+  // A link to the previous DeoptimizationContextRecord.
+  DeoptimizationContextRecord* const link_;
+
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationContextRecord);
 };
 
 class StackedShadowFrameRecord {
@@ -206,22 +220,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushAndClearDeoptimizationReturnValue() {
-  DeoptimizationReturnValueRecord* record = new DeoptimizationReturnValueRecord(
-      tls64_.deoptimization_return_value,
-      tls32_.deoptimization_return_value_is_reference,
-      tlsPtr_.deoptimization_return_value_stack);
-  tlsPtr_.deoptimization_return_value_stack = record;
-  ClearDeoptimizationReturnValue();
+void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+                                       mirror::Throwable* exception) {
+  DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
+      return_value,
+      is_reference,
+      exception,
+      tlsPtr_.deoptimization_context_stack);
+  tlsPtr_.deoptimization_context_stack = record;
 }
 
-JValue Thread::PopDeoptimizationReturnValue() {
-  DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack;
-  DCHECK(record != nullptr);
-  tlsPtr_.deoptimization_return_value_stack = record->GetLink();
-  JValue ret_val(record->GetReturnValue());
+void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+  AssertHasDeoptimizationContext();
+  DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
+  tlsPtr_.deoptimization_context_stack = record->GetLink();
+  result->SetJ(record->GetReturnValue().GetJ());
+  *exception = record->GetPendingException();
   delete record;
-  return ret_val;
+}
+
+void Thread::AssertHasDeoptimizationContext() {
+  CHECK(tlsPtr_.deoptimization_context_stack != nullptr)
+      << "No deoptimization context for thread " << *this;
 }
 
 void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type) {
@@ -1575,6 +1595,9 @@
   CHECK(tlsPtr_.flip_function == nullptr);
   CHECK_EQ(tls32_.suspended_at_suspend_check, false);
 
+  // Make sure we processed all deoptimization requests.
+  CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
+
   // We may be deleting a still born thread.
   SetStateUnsafe(kTerminated);
 
@@ -2593,7 +2616,7 @@
   visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id));
   if (tlsPtr_.exception != nullptr && tlsPtr_.exception != GetDeoptimizationException()) {
     visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception),
-                   RootInfo(kRootNativeStack, thread_id));
+                       RootInfo(kRootNativeStack, thread_id));
   }
   visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id));
   tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id));
@@ -2602,6 +2625,7 @@
   if (tlsPtr_.debug_invoke_req != nullptr) {
     tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
   }
+  // Visit roots for deoptimization.
   if (tlsPtr_.stacked_shadow_frame_record != nullptr) {
     RootCallbackVisitor visitor_to_callback(visitor, thread_id);
     ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
@@ -2615,14 +2639,16 @@
       }
     }
   }
-  if (tlsPtr_.deoptimization_return_value_stack != nullptr) {
-    for (DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack;
+  if (tlsPtr_.deoptimization_context_stack != nullptr) {
+    for (DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
          record != nullptr;
          record = record->GetLink()) {
       if (record->IsReference()) {
-        visitor->VisitRootIfNonNull(record->GetGCRoot(),
-            RootInfo(kRootThreadObject, thread_id));
+        visitor->VisitRootIfNonNull(record->GetReturnValueAsGCRoot(),
+                                    RootInfo(kRootThreadObject, thread_id));
       }
+      visitor->VisitRootIfNonNull(record->GetPendingExceptionAsGCRoot(),
+                                  RootInfo(kRootThreadObject, thread_id));
     }
   }
   for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 9bb57bf..2d450f5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -77,7 +77,7 @@
 class Closure;
 class Context;
 struct DebugInvokeReq;
-class DeoptimizationReturnValueRecord;
+class DeoptimizationContextRecord;
 class DexFile;
 class JavaVMExt;
 struct JNIEnvExt;
@@ -830,19 +830,13 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void SetDeoptimizationReturnValue(const JValue& ret_val, bool is_reference) {
-    tls64_.deoptimization_return_value.SetJ(ret_val.GetJ());
-    tls32_.deoptimization_return_value_is_reference = is_reference;
-  }
-  bool IsDeoptimizationReturnValueReference() {
-    return tls32_.deoptimization_return_value_is_reference;
-  }
-  void ClearDeoptimizationReturnValue() {
-    tls64_.deoptimization_return_value.SetJ(0);
-    tls32_.deoptimization_return_value_is_reference = false;
-  }
-  void PushAndClearDeoptimizationReturnValue();
-  JValue PopDeoptimizationReturnValue();
+  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+                                 mirror::Throwable* exception)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void AssertHasDeoptimizationContext()
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type);
   ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type);
 
@@ -1102,9 +1096,8 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
-      deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false),
-      ready_for_debug_invoke(false), debug_method_entry_(false), is_gc_marking(false),
-      weak_ref_access_enabled(true) {
+      suspended_at_suspend_check(false), ready_for_debug_invoke(false),
+      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1144,10 +1137,6 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // True if the return value for interpreter after deoptimization is a reference.
-    // For gc purpose.
-    bool32_t deoptimization_return_value_is_reference;
-
     // True if the thread is suspended in FullSuspendCheck(). This is
     // used to distinguish runnable threads that are suspended due to
     // a normal suspend check from other threads.
@@ -1178,15 +1167,12 @@
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
-    tls_64bit_sized_values() : trace_clock_base(0), deoptimization_return_value() {
+    tls_64bit_sized_values() : trace_clock_base(0) {
     }
 
     // The clock base used for tracing.
     uint64_t trace_clock_base;
 
-    // Return value used by deoptimization.
-    JValue deoptimization_return_value;
-
     RuntimeStats stats;
   } tls64_;
 
@@ -1197,7 +1183,7 @@
       stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
       top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
       instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
-      stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr),
+      stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
       thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
@@ -1282,7 +1268,7 @@
     StackedShadowFrameRecord* stacked_shadow_frame_record;
 
     // Deoptimization return value record stack.
-    DeoptimizationReturnValueRecord* deoptimization_return_value_stack;
+    DeoptimizationContextRecord* deoptimization_context_stack;
 
     // A cached copy of the java.lang.Thread's name.
     std::string* name;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 7579d8d..4ab5c0e 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -638,10 +638,11 @@
     const std::map<const DexFile*, DexIndexBitSet*>& seen_methods,
     std::set<ArtMethod*>* visited_methods) SHARED_REQUIRES(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Thread* const self = Thread::Current();
   for (auto& e : seen_methods) {
     DexIndexBitSet* bit_set = e.second;
     // TODO: Visit trace methods as roots.
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(*e.first, false);
+    mirror::DexCache* dex_cache = class_linker->FindDexCache(self, *e.first, false);
     for (uint32_t i = 0; i < bit_set->size(); ++i) {
       if ((*bit_set)[i]) {
         visited_methods->insert(dex_cache->GetResolvedMethod(i, sizeof(void*)));
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index bd606a6..08ccf0e 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -594,6 +594,54 @@
     Assert.assertEquals(Math.ceil(-2.5), -2.0d, 0.0);
     Assert.assertEquals(Math.ceil(-2.9), -2.0d, 0.0);
     Assert.assertEquals(Math.ceil(-3.0), -3.0d, 0.0);
+    // 2^52 - 1.5
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x432FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFEl), 0.0);
+    // 2^52 - 0.5
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x432FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^52
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x4330000000000000l)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^53 - 1
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x433FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x433FFFFFFFFFFFFFl), 0.0);
+    // 2^53
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x4340000000000000l)),
+                        Double.longBitsToDouble(0x4340000000000000l), 0.0);
+    // 2^63 - 2^10
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl), 0.0);
+    // 2^63
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x43E0000000000000l)),
+                        Double.longBitsToDouble(0x43E0000000000000l), 0.0);
+    // 2^64
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Double.longBitsToDouble(0x43F0000000000000l), 0.0);
+    // -(2^52 - 1.5)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC32FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFCl), 0.0);
+    // -(2^52 - 0.5)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC32FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFEl), 0.0);
+    // -2^52
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC330000000000000l)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -(2^53 - 1)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl), 0.0);
+    // -2^53
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC340000000000000l)),
+                        Double.longBitsToDouble(0xC340000000000000l), 0.0);
+    // -(2^63 - 2^10)
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl), 0.0);
+    // -2^63
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC3E0000000000000l)),
+                        Double.longBitsToDouble(0xC3E0000000000000l), 0.0);
+    // -2^64
+    Assert.assertEquals(Math.ceil(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Double.longBitsToDouble(0xC3F0000000000000l), 0.0);
     Assert.assertEquals(Math.ceil(Double.NaN), Double.NaN, 0.0);
     Assert.assertEquals(Math.ceil(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0);
     Assert.assertEquals(Math.ceil(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0);
@@ -613,6 +661,54 @@
     Assert.assertEquals(Math.floor(-2.5), -3.0d, 0.0);
     Assert.assertEquals(Math.floor(-2.9), -3.0d, 0.0);
     Assert.assertEquals(Math.floor(-3.0), -3.0d, 0.0);
+    // 2^52 - 1.5
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x432FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFCl), 0.0);
+    // 2^52 - 0.5
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x432FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFEl), 0.0);
+    // 2^52
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x4330000000000000l)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^53 - 1
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x433FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x433FFFFFFFFFFFFFl), 0.0);
+    // 2^53
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x4340000000000000l)),
+                        Double.longBitsToDouble(0x4340000000000000l), 0.0);
+    // 2^63 - 2^10
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl), 0.0);
+    // 2^63
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x43E0000000000000l)),
+                        Double.longBitsToDouble(0x43E0000000000000l), 0.0);
+    // 2^64
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Double.longBitsToDouble(0x43F0000000000000l), 0.0);
+    // -(2^52 - 1.5)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC32FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFEl), 0.0);
+    // -(2^52 - 0.5)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC32FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -2^52
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC330000000000000l)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -(2^53 - 1)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl), 0.0);
+    // -2^53
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC340000000000000l)),
+                        Double.longBitsToDouble(0xC340000000000000l), 0.0);
+    // -(2^63 - 2^10)
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl), 0.0);
+    // -2^63
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC3E0000000000000l)),
+                        Double.longBitsToDouble(0xC3E0000000000000l), 0.0);
+    // -2^64
+    Assert.assertEquals(Math.floor(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Double.longBitsToDouble(0xC3F0000000000000l), 0.0);
     Assert.assertEquals(Math.floor(Double.NaN), Double.NaN, 0.0);
     Assert.assertEquals(Math.floor(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0);
     Assert.assertEquals(Math.floor(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0);
@@ -632,6 +728,54 @@
     Assert.assertEquals(Math.rint(-2.5), -2.0d, 0.0);
     Assert.assertEquals(Math.rint(-2.9), -3.0d, 0.0);
     Assert.assertEquals(Math.rint(-3.0), -3.0d, 0.0);
+    // 2^52 - 1.5
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x432FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0x432FFFFFFFFFFFFCl), 0.0);
+    // 2^52 - 0.5
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x432FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^52
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x4330000000000000l)),
+                        Double.longBitsToDouble(0x4330000000000000l), 0.0);
+    // 2^53 - 1
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x433FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x433FFFFFFFFFFFFFl), 0.0);
+    // 2^53
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x4340000000000000l)),
+                        Double.longBitsToDouble(0x4340000000000000l), 0.0);
+    // 2^63 - 2^10
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0x43DFFFFFFFFFFFFFl), 0.0);
+    // 2^63
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x43E0000000000000l)),
+                        Double.longBitsToDouble(0x43E0000000000000l), 0.0);
+    // 2^64
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Double.longBitsToDouble(0x43F0000000000000l), 0.0);
+    // -(2^52 - 1.5)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC32FFFFFFFFFFFFDl)),
+                        Double.longBitsToDouble(0xC32FFFFFFFFFFFFCl), 0.0);
+    // -(2^52 - 0.5)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC32FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -2^52
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC330000000000000l)),
+                        Double.longBitsToDouble(0xC330000000000000l), 0.0);
+    // -(2^53 - 1)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC33FFFFFFFFFFFFFl), 0.0);
+    // -2^53
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC340000000000000l)),
+                        Double.longBitsToDouble(0xC340000000000000l), 0.0);
+    // -(2^63 - 2^10)
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl)),
+                        Double.longBitsToDouble(0xC3DFFFFFFFFFFFFFl), 0.0);
+    // -2^63
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC3E0000000000000l)),
+                        Double.longBitsToDouble(0xC3E0000000000000l), 0.0);
+    // -2^64
+    Assert.assertEquals(Math.rint(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Double.longBitsToDouble(0xC3F0000000000000l), 0.0);
     Assert.assertEquals(Math.rint(Double.NaN), Double.NaN, 0.0);
     Assert.assertEquals(Math.rint(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0);
     Assert.assertEquals(Math.rint(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0);
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index af1eaea..53b72e9 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -26,6 +26,7 @@
      * Drives tests.
      */
     public static void main(String[] args) {
+        System.loadLibrary(args[0]);
         Main m = new Main();
 
         m.recursiveSync(0);
@@ -54,7 +55,6 @@
         m.triplet(obj1, obj2, 0);
         System.out.println("triplet ok");
 
-        System.loadLibrary("arttest");
         runSmaliTests();
     }
 
diff --git a/test/474-fp-sub-neg/expected.txt b/test/474-fp-sub-neg/expected.txt
index 1c15abb..1c7ded3 100644
--- a/test/474-fp-sub-neg/expected.txt
+++ b/test/474-fp-sub-neg/expected.txt
@@ -1,6 +1,13 @@
 -0.0
-0.0
-0.0
 -0.0
 0.0
 0.0
+0.0
+0.0
+-0.0
+-0.0
+0.0
+0.0
+0.0
+0.0
+d 0.0
diff --git a/test/474-fp-sub-neg/src/Main.java b/test/474-fp-sub-neg/src/Main.java
index c190e8e..796d56c 100644
--- a/test/474-fp-sub-neg/src/Main.java
+++ b/test/474-fp-sub-neg/src/Main.java
@@ -17,33 +17,58 @@
 public class Main {
     public static void floatTest() {
       float f = 0;
+      float nf = -0;
       float fc = 1f;
       for (int i = 0; i < 2; i++) {
         f -= fc;
         f = -f;
+        nf -= fc;
+        nf = -nf;
       }
 
       System.out.println(f);
+      System.out.println(nf);
       System.out.println(f + 0f);
       System.out.println(f - (-0f));
+      System.out.println(-f - (-nf));
+      System.out.println(-f + (-nf));
     }
 
     public static void doubleTest() {
       double d = 0;
+      double nd = -0;
       double dc = 1f;
       for (int i = 0; i < 2; i++) {
         d -= dc;
         d = -d;
+        nd -= dc;
+        nd = -nd;
       }
 
       System.out.println(d);
+      System.out.println(nd);
       System.out.println(d + 0f);
       System.out.println(d - (-0f));
+      System.out.println(-d - (-nd));
+      System.out.println(-d + (-nd));
+    }
+
+    public static void bug_1() {
+      int i4=18, i3=-48959;
+      float d;
+      float f=-0.0f;
+      float a=0.0f;
+
+      d = -f + (-a);
+      f += i4 * i3;
+
+      System.out.println("d " + d);
     }
 
     public static void main(String[] args) {
         doubleTest();
         floatTest();
+        bug_1();
     }
 
 }
diff --git a/test/dexdump/run-all-tests b/test/dexdump/run-all-tests
index d9f1e96..9cf7ab6 100755
--- a/test/dexdump/run-all-tests
+++ b/test/dexdump/run-all-tests
@@ -43,7 +43,7 @@
 DEXDFLAGS2="-l xml"
 
 # Set up dexlist binary and flags to test.
-DEXL="${ANDROID_HOST_OUT}/bin/dexlist2"
+DEXL="${ANDROID_HOST_OUT}/bin/dexlist"
 DEXLFLAGS=""
 
 # Run the tests.