Merge "Use the object class as top in reference type propagation"
diff --git a/Android.mk b/Android.mk
index c01464a..49b61bb 100644
--- a/Android.mk
+++ b/Android.mk
@@ -427,6 +427,7 @@
 	adb shell setprop dalvik.vm.dex2oat-filter \"\"
 	adb shell setprop dalvik.vm.image-dex2oat-filter \"\"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
+	adb shell setprop dalvik.vm.usejit false
 	adb shell start
 
 .PHONY: use-artd-full
@@ -437,16 +438,18 @@
 	adb shell setprop dalvik.vm.dex2oat-filter \"\"
 	adb shell setprop dalvik.vm.image-dex2oat-filter \"\"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
+	adb shell setprop dalvik.vm.usejit false
 	adb shell start
 
-.PHONY: use-art-verify-at-runtime
-use-art-verify-at-runtime:
+.PHONY: use-art-jit
+use-art-jit:
 	adb root
 	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter "verify-at-runtime"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-at-runtime"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
+	adb shell setprop dalvik.vm.usejit true
 	adb shell start
 
 .PHONY: use-art-interpret-only
@@ -457,6 +460,7 @@
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
+	adb shell setprop dalvik.vm.usejit false
 	adb shell start
 
 .PHONY: use-artd-interpret-only
@@ -467,6 +471,7 @@
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
+	adb shell setprop dalvik.vm.usejit false
 	adb shell start
 
 .PHONY: use-art-verify-none
@@ -477,6 +482,7 @@
 	adb shell setprop dalvik.vm.dex2oat-filter "verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-none"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
+	adb shell setprop dalvik.vm.usejit false
 	adb shell start
 
 ########################################################################
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 0f756ef..6952d69 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -20,6 +20,15 @@
 ART_TARGET_SUPPORTED_ARCH := arm arm64 mips mips64 x86 x86_64
 ART_HOST_SUPPORTED_ARCH := x86 x86_64
 
+ifneq ($(HOST_OS),darwin)
+  ART_HOST_SUPPORTED_ARCH := x86 x86_64
+else
+  # Mac OS doesn't support low-4GB allocation in a 64-bit process. So we won't be able to create
+  # our heaps.
+  ART_HOST_SUPPORTED_ARCH := x86
+  ART_MULTILIB_OVERRIDE_host := 32
+endif
+
 ART_COVERAGE := false
 
 ifeq ($(ART_COVERAGE),true)
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 83dd690..5d4feb8 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -60,6 +60,11 @@
 $(info Enabling ART_BUILD_HOST_STATIC)
 endif
 
+ifeq ($(ART_TEST_DEBUG_GC),true)
+  ART_DEFAULT_GC_TYPE := SS
+  ART_USE_TLAB := true
+endif
+
 #
 # Used to enable JIT
 #
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 45b6490..2f43f5f 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -45,6 +45,7 @@
 
 # Do you want interpreter tests run?
 ART_TEST_INTERPRETER ?= $(ART_TEST_FULL)
+ART_TEST_INTERPRETER_ACCESS_CHECKS ?= $(ART_TEST_FULL)
 
 # Do you want JIT tests run?
 ART_TEST_JIT ?= $(ART_TEST_FULL)
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index a251c92..72cf978 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -127,6 +127,10 @@
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
   endif
 
+  ifdef ART_MULTILIB_OVERRIDE_$$(art_target_or_host)
+    art_multilib := $$(ART_MULTILIB_OVERRIDE_$$(art_target_or_host))
+  endif
+
   LOCAL_MULTILIB := $$(art_multilib)
   art_out_binary_name := $$(LOCAL_MODULE)
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 0958c64..377cd4e 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -264,6 +264,7 @@
   compiler/optimizing/parallel_move_test.cc \
   compiler/optimizing/pretty_printer_test.cc \
   compiler/optimizing/register_allocator_test.cc \
+  compiler/optimizing/side_effects_test.cc \
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 728469c..c70e12d 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -31,7 +31,7 @@
 endif
 
 # Use dex2oat debug version for better error reporting
-# $(1): compiler - default, optimizing, jit or interpreter.
+# $(1): compiler - default, optimizing, jit, interpreter or interpreter-access-checks.
 # $(2): pic/no-pic
 # $(3): 2ND_ or undefined, 2ND_ for 32-bit host builds.
 # $(4): wrapper, e.g., valgrind.
@@ -64,12 +64,16 @@
     core_compile_options += --compiler-filter=interpret-only
     core_infix := -interpreter
   endif
+  ifeq ($(1),interpreter-access-checks)
+    core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail
+    core_infix := -interpreter-access-checks
+  endif
   ifeq ($(1),default)
     # Default has no infix, no compile options.
   endif
-  ifneq ($(filter-out default interpreter jit optimizing,$(1)),)
+  ifneq ($(filter-out default interpreter interpreter-access-checks jit optimizing,$(1)),)
     #Technically this test is not precise, but hopefully good enough.
-    $$(error found $(1) expected default, interpreter, jit or optimizing)
+    $$(error found $(1) expected default, interpreter, interpreter-access-checks, jit or optimizing)
   endif
 
   ifeq ($(2),pic)
@@ -127,7 +131,7 @@
   core_pic_infix :=
 endef  # create-core-oat-host-rules
 
-# $(1): compiler - default, optimizing, jit or interpreter.
+# $(1): compiler - default, optimizing, jit, interpreter or interpreter-access-checks.
 # $(2): wrapper.
 # $(3): dex2oat suffix.
 define create-core-oat-host-rule-combination
@@ -143,12 +147,14 @@
 $(eval $(call create-core-oat-host-rule-combination,default,,))
 $(eval $(call create-core-oat-host-rule-combination,optimizing,,))
 $(eval $(call create-core-oat-host-rule-combination,interpreter,,))
+$(eval $(call create-core-oat-host-rule-combination,interpreter-access-checks,,))
 
 valgrindHOST_CORE_IMG_OUTS :=
 valgrindHOST_CORE_OAT_OUTS :=
 $(eval $(call create-core-oat-host-rule-combination,default,valgrind,32))
 $(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32))
 $(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32))
+$(eval $(call create-core-oat-host-rule-combination,interpreter-access-checks,valgrind,32))
 
 valgrind-test-art-host-dex2oat-host: $(valgrindHOST_CORE_IMG_OUTS)
 
@@ -178,12 +184,16 @@
     core_compile_options += --compiler-filter=interpret-only
     core_infix := -interpreter
   endif
+  ifeq ($(1),interpreter-access-checks)
+    core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail
+    core_infix := -interpreter-access-checks
+  endif
   ifeq ($(1),default)
     # Default has no infix, no compile options.
   endif
-  ifneq ($(filter-out default interpreter jit optimizing,$(1)),)
+  ifneq ($(filter-out default interpreter interpreter-access-checks jit optimizing,$(1)),)
     # Technically this test is not precise, but hopefully good enough.
-    $$(error found $(1) expected default, interpreter, jit or optimizing)
+    $$(error found $(1) expected default, interpreter, interpreter-access-checks, jit or optimizing)
   endif
 
   ifeq ($(2),pic)
@@ -246,7 +256,7 @@
   core_pic_infix :=
 endef  # create-core-oat-target-rules
 
-# $(1): compiler - default, optimizing, jit or interpreter.
+# $(1): compiler - default, optimizing, jit, interpreter or interpreter-access-checks.
 # $(2): wrapper.
 # $(3): dex2oat suffix.
 define create-core-oat-target-rule-combination
@@ -262,12 +272,14 @@
 $(eval $(call create-core-oat-target-rule-combination,default,,))
 $(eval $(call create-core-oat-target-rule-combination,optimizing,,))
 $(eval $(call create-core-oat-target-rule-combination,interpreter,,))
+$(eval $(call create-core-oat-target-rule-combination,interpreter-access-checks,,))
 
 valgrindTARGET_CORE_IMG_OUTS :=
 valgrindTARGET_CORE_OAT_OUTS :=
 $(eval $(call create-core-oat-target-rule-combination,default,valgrind,32))
 $(eval $(call create-core-oat-target-rule-combination,optimizing,valgrind,32))
 $(eval $(call create-core-oat-target-rule-combination,interpreter,valgrind,32))
+$(eval $(call create-core-oat-target-rule-combination,interpreter-access-checks,valgrind,32))
 
 valgrind-test-art-host-dex2oat-target: $(valgrindTARGET_CORE_IMG_OUTS)
 
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 98fd327..52df7de 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -216,9 +216,6 @@
   EXPECT_SINGLE_PARSE_EXISTS("-Xzygote", M::Zygote);
   EXPECT_SINGLE_PARSE_VALUE_STR("/hello/world", "-Xbootclasspath:/hello/world", M::BootClassPath);
   EXPECT_SINGLE_PARSE_VALUE("/hello/world", "-Xbootclasspath:/hello/world", M::BootClassPath);
-  EXPECT_SINGLE_PARSE_VALUE(false, "-Xverify:none", M::Verify);
-  EXPECT_SINGLE_PARSE_VALUE(true, "-Xverify:remote", M::Verify);
-  EXPECT_SINGLE_PARSE_VALUE(true, "-Xverify:all", M::Verify);
   EXPECT_SINGLE_PARSE_VALUE(Memory<1>(234), "-Xss234", M::StackSize);
   EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(1234*MB), "-Xms1234m", M::MemoryInitialSize);
   EXPECT_SINGLE_PARSE_VALUE(true, "-XX:EnableHSpaceCompactForOOM", M::EnableHSpaceCompactForOOM);
@@ -550,6 +547,14 @@
                             M::ExperimentalLambdas);
 }
 
+// -Xverify:_
+TEST_F(CmdlineParserTest, TestVerify) {
+  EXPECT_SINGLE_PARSE_VALUE(verifier::VerifyMode::kNone,     "-Xverify:none",     M::Verify);
+  EXPECT_SINGLE_PARSE_VALUE(verifier::VerifyMode::kEnable,   "-Xverify:remote",   M::Verify);
+  EXPECT_SINGLE_PARSE_VALUE(verifier::VerifyMode::kEnable,   "-Xverify:all",      M::Verify);
+  EXPECT_SINGLE_PARSE_VALUE(verifier::VerifyMode::kSoftFail, "-Xverify:softfail", M::Verify);
+}
+
 TEST_F(CmdlineParserTest, TestIgnoreUnrecognized) {
   RuntimeParser::Builder parserBuilder;
 
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index d1acada..74ef35e 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -23,20 +23,12 @@
                            const ArrayRef<const uint8_t>& quick_code, bool owns_code_array)
     : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
       owns_code_array_(owns_code_array), quick_code_(nullptr) {
-  SetCode(&quick_code);
-}
-
-void CompiledCode::SetCode(const ArrayRef<const uint8_t>* quick_code) {
-  if (quick_code != nullptr) {
-    CHECK(!quick_code->empty());
-    if (owns_code_array_) {
-      // If we are supposed to own the code, don't deduplicate it.
-      CHECK(quick_code_ == nullptr);
-      quick_code_ = new SwapVector<uint8_t>(quick_code->begin(), quick_code->end(),
-                                            compiler_driver_->GetSwapSpaceAllocator());
-    } else {
-      quick_code_ = compiler_driver_->DeduplicateCode(*quick_code);
-    }
+  if (owns_code_array_) {
+    // If we are supposed to own the code, don't deduplicate it.
+    quick_code_ = new SwapVector<uint8_t>(quick_code.begin(), quick_code.end(),
+                                          compiler_driver_->GetSwapSpaceAllocator());
+  } else {
+    quick_code_ = compiler_driver_->DeduplicateCode(quick_code);
   }
 }
 
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 45a62bc..a4d2387 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -47,8 +47,6 @@
     return quick_code_;
   }
 
-  void SetCode(const ArrayRef<const uint8_t>* quick_code);
-
   bool operator==(const CompiledCode& rhs) const;
 
   // To align an offset from a page-aligned value to make it suitable
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index bd59046..4b56b69 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -18,6 +18,7 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "driver/compiler_driver.h"
@@ -34,6 +35,13 @@
 // Control check-cast elision.
 const bool kEnableCheckCastEllision = true;
 
+struct QuickenedInfo {
+  QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+
+  uint32_t dex_pc;
+  uint16_t dex_member_index;
+};
+
 class DexCompiler {
  public:
   DexCompiler(art::CompilerDriver& compiler,
@@ -47,6 +55,10 @@
 
   void Compile();
 
+  const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
+    return quickened_info_;
+  }
+
  private:
   const DexFile& GetDexFile() const {
     return *unit_.GetDexFile();
@@ -87,6 +99,11 @@
   const DexCompilationUnit& unit_;
   const DexToDexCompilationLevel dex_to_dex_compilation_level_;
 
+  // Filled by the compiler when quickening, in order to encode that information
+  // in the .oat file. The runtime will use that information to get to the original
+  // opcodes.
+  std::vector<QuickenedInfo> quickened_info_;
+
   DISALLOW_COPY_AND_ASSIGN(DexCompiler);
 };
 
@@ -248,6 +265,7 @@
     inst->SetOpcode(new_opcode);
     // Replace field index by field offset.
     inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx));
   }
 }
 
@@ -287,24 +305,60 @@
       } else {
         inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
       }
+      quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx));
     }
   }
 }
 
-}  // namespace optimizer
-}  // namespace art
-
-extern "C" void ArtCompileDEX(art::CompilerDriver& driver, const art::DexFile::CodeItem* code_item,
-                              uint32_t access_flags, art::InvokeType invoke_type,
-                              uint16_t class_def_idx, uint32_t method_idx, jobject class_loader,
-                              const art::DexFile& dex_file,
-                              art::DexToDexCompilationLevel dex_to_dex_compilation_level) {
-  UNUSED(invoke_type);
+extern "C" CompiledMethod* ArtCompileDEX(
+    art::CompilerDriver& driver,
+    const art::DexFile::CodeItem* code_item,
+    uint32_t access_flags,
+    art::InvokeType invoke_type ATTRIBUTE_UNUSED,
+    uint16_t class_def_idx,
+    uint32_t method_idx,
+    jobject class_loader,
+    const art::DexFile& dex_file,
+    art::DexToDexCompilationLevel dex_to_dex_compilation_level) {
   if (dex_to_dex_compilation_level != art::kDontDexToDexCompile) {
     art::DexCompilationUnit unit(nullptr, class_loader, art::Runtime::Current()->GetClassLinker(),
                                  dex_file, code_item, class_def_idx, method_idx, access_flags,
                                  driver.GetVerifiedMethod(&dex_file, method_idx));
     art::optimizer::DexCompiler dex_compiler(driver, unit, dex_to_dex_compilation_level);
     dex_compiler.Compile();
+    if (dex_compiler.GetQuickenedInfo().empty()) {
+      // No need to create a CompiledMethod if there are no quickened opcodes.
+      return nullptr;
+    }
+
+    // Create a `CompiledMethod`, with the quickened information in the vmap table.
+    Leb128EncodingVector builder;
+    for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
+      builder.PushBackUnsigned(info.dex_pc);
+      builder.PushBackUnsigned(info.dex_member_index);
+    }
+    InstructionSet instruction_set = driver.GetInstructionSet();
+    if (instruction_set == kThumb2) {
+      // Don't use the thumb2 instruction set to avoid the one off code delta.
+      instruction_set = kArm;
+    }
+    return CompiledMethod::SwapAllocCompiledMethod(
+        &driver,
+        instruction_set,
+        ArrayRef<const uint8_t>(),                   // no code
+        0,
+        0,
+        0,
+        nullptr,                                     // src_mapping_table
+        ArrayRef<const uint8_t>(),                   // mapping_table
+        ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
+        ArrayRef<const uint8_t>(),                   // gc_map
+        ArrayRef<const uint8_t>(),                   // cfi data
+        ArrayRef<const LinkerPatch>());
   }
+  return nullptr;
 }
+
+}  // namespace optimizer
+
+}  // namespace art
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index b1f5d87..044989e 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -1192,7 +1192,6 @@
     case Instruction::CONST_WIDE_32:
     case Instruction::CONST_WIDE:
     case Instruction::CONST_WIDE_HIGH16:
-    case Instruction::ARRAY_LENGTH:
     case Instruction::CMPL_FLOAT:
     case Instruction::CMPG_FLOAT:
     case Instruction::CMPL_DOUBLE:
@@ -1316,6 +1315,13 @@
       }
       break;
 
+    case Instruction::ARRAY_LENGTH:
+      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
+        must_keep = true;
+        uses_all_vregs = true;
+      }
+      break;
+
     case Instruction::AGET_OBJECT:
     case Instruction::AGET:
     case Instruction::AGET_WIDE:
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index 461c844..6ba91b6 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -2066,4 +2066,31 @@
   }
 }
 
+TEST_F(GvnDeadCodeEliminationTestSimple, ArrayLengthThrows) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 0),              // null
+      DEF_UNOP(3, Instruction::ARRAY_LENGTH, 1u, 0u),       // null.length
+      DEF_CONST(3, Instruction::CONST, 2u, 1000u),          // Overwrite the array-length dest.
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 1 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2 };
+  ExpectValueNamesNE(diff_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index df4a9f2..5f911db 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1298,7 +1298,7 @@
              */
             delta &= ~0x3;
           }
-          DCHECK_EQ((delta & 0x3), 0);
+          DCHECK_ALIGNED(delta, 4);
           // First, a sanity check for cases we shouldn't see now
           if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
               ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 2ef92f8..062f7af 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -880,7 +880,7 @@
 LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
                                                       int displacement, RegStorage r_src_dest,
                                                       RegStorage r_work) {
-  DCHECK_EQ(displacement & 3, 0);
+  DCHECK_ALIGNED(displacement, 4);
   constexpr int kOffsetMask = 0xff << 2;
   int encoded_disp = (displacement & kOffsetMask) >> 2;  // Within range of the instruction.
   RegStorage r_ptr = r_base;
@@ -942,7 +942,7 @@
         already_generated = true;
         break;
       }
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       scale = 2;
       if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
           (displacement >= 0)) {
@@ -959,14 +959,14 @@
       }
       break;
     case kUnsignedHalf:
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       scale = 1;
       short_form = all_low && (displacement >> (5 + scale)) == 0;
       opcode16 = kThumbLdrhRRI5;
       opcode32 = kThumb2LdrhRRI12;
       break;
     case kSignedHalf:
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       scale = 1;
       DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
       opcode32 = kThumb2LdrshRRI12;
@@ -1096,7 +1096,7 @@
         already_generated = true;
         break;
       }
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       scale = 2;
       if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
         short_form = true;
@@ -1109,7 +1109,7 @@
       break;
     case kUnsignedHalf:
     case kSignedHalf:
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       scale = 1;
       short_form = all_low && (displacement >> (5 + scale)) == 0;
       opcode16 = kThumbStrhRRI5;
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index b78fb80..25c69d1 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -909,7 +909,7 @@
           CodeOffset target = target_lir->offset +
               ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
           int32_t delta = target - pc;
-          DCHECK_EQ(delta & 0x3, 0);
+          DCHECK_ALIGNED(delta, 4);
           if (!IS_SIGNED_IMM26(delta >> 2)) {
             LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
           }
@@ -933,7 +933,7 @@
           CodeOffset target = target_lir->offset +
             ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
           int32_t delta = target - pc;
-          DCHECK_EQ(delta & 0x3, 0);
+          DCHECK_ALIGNED(delta, 4);
           if (!IS_SIGNED_IMM19(delta >> 2)) {
             LOG(FATAL) << "Invalid jump range in kFixupLoad";
           }
@@ -965,7 +965,7 @@
           CodeOffset target = target_lir->offset +
               ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
           int32_t delta = target - pc;
-          DCHECK_EQ(delta & 0x3, 0);
+          DCHECK_ALIGNED(delta, 4);
           // Check if branch offset can be encoded in tbz/tbnz.
           if (!IS_SIGNED_IMM14(delta >> 2)) {
             DexOffset dalvik_offset = lir->dalvik_offset;
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 37e5804..ec2475a 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -714,7 +714,7 @@
         } else {
           opcode = kMipsFldc1;
         }
-        DCHECK_EQ((displacement & 0x3), 0);
+        DCHECK_ALIGNED(displacement, 4);
         break;
       }
       is64bit = true;
@@ -736,15 +736,15 @@
           DCHECK(r_dest.IsDouble());
         }
       }
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       break;
     case kUnsignedHalf:
       opcode = kMipsLhu;
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       break;
     case kSignedHalf:
       opcode = kMipsLh;
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       break;
     case kUnsignedByte:
       opcode = kMipsLbu;
@@ -891,7 +891,7 @@
         } else {
           opcode = kMipsFsdc1;
         }
-        DCHECK_EQ((displacement & 0x3), 0);
+        DCHECK_ALIGNED(displacement, 4);
         break;
       }
       is64bit = true;
@@ -913,12 +913,12 @@
           DCHECK(r_src.IsDouble());
         }
       }
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       break;
     case kUnsignedHalf:
     case kSignedHalf:
       opcode = kMipsSh;
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       break;
     case kUnsignedByte:
     case kSignedByte:
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 61a1bec..b16ae98 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -659,7 +659,7 @@
         opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
       }
       // TODO: double store is to unaligned address
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       break;
     case kWord:
       if (cu_->target64) {
@@ -677,15 +677,15 @@
         opcode = is_array ? kX86MovssRA : kX86MovssRM;
         DCHECK(r_dest.IsFloat());
       }
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       break;
     case kUnsignedHalf:
       opcode = is_array ? kX86Movzx16RA : kX86Movzx16RM;
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       break;
     case kSignedHalf:
       opcode = is_array ? kX86Movsx16RA : kX86Movsx16RM;
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       break;
     case kUnsignedByte:
       opcode = is_array ? kX86Movzx8RA : kX86Movzx8RM;
@@ -812,7 +812,7 @@
         opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
       }
       // TODO: double store is to unaligned address
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       break;
     case kWord:
       if (cu_->target64) {
@@ -831,13 +831,13 @@
         opcode = is_array ? kX86MovssAR : kX86MovssMR;
         DCHECK(r_src.IsSingle());
       }
-      DCHECK_EQ((displacement & 0x3), 0);
+      DCHECK_ALIGNED(displacement, 4);
       consider_non_temporal = true;
       break;
     case kUnsignedHalf:
     case kSignedHalf:
       opcode = is_array ? kX86Mov16AR : kX86Mov16MR;
-      DCHECK_EQ((displacement & 0x1), 0);
+      DCHECK_ALIGNED(displacement, 2);
       break;
     case kUnsignedByte:
     case kSignedByte:
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 7890108..a52bfae 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2291,10 +2291,16 @@
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
       // Do not optimize if a VerifiedMethod is missing. SafeCast elision, for example, relies on
       // it.
-      (*dex_to_dex_compiler_)(*this, code_item, access_flags,
-                              invoke_type, class_def_idx,
-                              method_idx, class_loader, dex_file,
-                              has_verified_method ? dex_to_dex_compilation_level : kRequired);
+      compiled_method = (*dex_to_dex_compiler_)(
+          *this,
+          code_item,
+          access_flags,
+          invoke_type,
+          class_def_idx,
+          method_idx,
+          class_loader,
+          dex_file,
+          has_verified_method ? dex_to_dex_compilation_level : kRequired);
     }
   }
   if (kTimeCompileMethod) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 2d7ceae..5cf4044 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -675,12 +675,13 @@
   typedef void (*CompilerCallbackFn)(CompilerDriver& driver);
   typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver);
 
-  typedef void (*DexToDexCompilerFn)(CompilerDriver& driver,
-                                     const DexFile::CodeItem* code_item,
-                                     uint32_t access_flags, InvokeType invoke_type,
-                                     uint32_t class_dex_idx, uint32_t method_idx,
-                                     jobject class_loader, const DexFile& dex_file,
-                                     DexToDexCompilationLevel dex_to_dex_compilation_level);
+  typedef CompiledMethod* (*DexToDexCompilerFn)(
+      CompilerDriver& driver,
+      const DexFile::CodeItem* code_item,
+      uint32_t access_flags, InvokeType invoke_type,
+      uint32_t class_dex_idx, uint32_t method_idx,
+      jobject class_loader, const DexFile& dex_file,
+      DexToDexCompilationLevel dex_to_dex_compilation_level);
   DexToDexCompilerFn dex_to_dex_compiler_;
 
   void* compiler_context_;
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index c68bbc0..c10ffeb 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -249,16 +249,16 @@
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
   std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_infos.begin(); it != method_infos.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
+  for (const OatWriter::DebugInfo& mi : method_infos) {
+    if (mi.deduped_) {
+      deduped_addresses.insert(mi.low_pc_);
     }
   }
 
   // Group the methods into compilation units based on source file.
   std::vector<std::vector<const OatWriter::DebugInfo*>> compilation_units;
   const char* last_source_file = nullptr;
-  for (const auto& mi : method_infos) {
+  for (const OatWriter::DebugInfo& mi : method_infos) {
     // Attribute given instruction range only to single method.
     // Otherwise the debugger might get really confused.
     if (!mi.deduped_) {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index fdfeb48..2b65aa9 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -715,8 +715,10 @@
       DCHECK_EQ(obj, obj->AsString()->Intern());
       return;
     }
-    mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong(
-        obj->AsString()->Intern());
+    // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since
+    // we are guaranteed to not have GC during image writing.
+    mirror::String* const interned = Runtime::Current()->GetInternTable()->InternImageString(
+        obj->AsString());
     if (obj != interned) {
       if (!IsImageBinSlotAssigned(interned)) {
         // interned obj is after us, allocate its location early
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 754fe84..1523383 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -199,7 +199,7 @@
   const uint8_t* GetOatAddress(uint32_t offset) const {
     // With Quick, code is within the OatFile, as there are all in one
     // .o ELF object.
-    DCHECK_LT(offset, oat_file_->Size());
+    DCHECK_LE(offset, oat_file_->Size());
     DCHECK(oat_data_begin_ != nullptr);
     return offset == 0u ? nullptr : oat_data_begin_ + offset;
   }
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index a057a4c..13f67e6 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -50,7 +50,7 @@
 
     // We want to put the method3 at a very precise offset.
     const uint32_t method3_offset = method1_offset + distance_without_thunks;
-    CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader)));
+    CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment);
 
     // Calculate size of method2 so that we put method3 at the correct place.
     const uint32_t method2_offset =
@@ -242,8 +242,10 @@
   };
 
   constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
-                                            kNopCode, ArrayRef<const LinkerPatch>(),
+  bool thunk_in_gap = Create2MethodsWithGap(method1_code,
+                                            ArrayRef<const LinkerPatch>(method1_patches),
+                                            kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
                                             bl_offset_in_method1 + max_positive_disp);
   ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
 
@@ -262,8 +264,10 @@
   };
 
   constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
-                                            method3_code, method3_patches,
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
+                                            method3_code,
+                                            ArrayRef<const LinkerPatch>(method3_patches),
                                             just_over_max_negative_disp - bl_offset_in_method3);
   ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
 
@@ -282,8 +286,10 @@
   };
 
   constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
-                                            kNopCode, ArrayRef<const LinkerPatch>(),
+  bool thunk_in_gap = Create2MethodsWithGap(method1_code,
+                                            ArrayRef<const LinkerPatch>(method1_patches),
+                                            kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
                                             bl_offset_in_method1 + just_over_max_positive_disp);
   ASSERT_TRUE(thunk_in_gap);
 
@@ -311,8 +317,10 @@
   };
 
   constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
-                                            method3_code, method3_patches,
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
+                                            method3_code,
+                                            ArrayRef<const LinkerPatch>(method3_patches),
                                             just_over_max_negative_disp - bl_offset_in_method3);
   ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
 
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 29355d6..6b9c530 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -108,7 +108,7 @@
     if (!current_method_thunks_.empty()) {
       uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64);
       if (kIsDebugBuild) {
-        CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size()));
+        CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize);
         size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
         CHECK_LE(num_thunks, processed_adrp_thunks_);
         for (size_t i = 0u; i != num_thunks; ++i) {
@@ -203,7 +203,7 @@
       if ((adrp & 0x9f000000u) != 0x90000000u) {
         CHECK(fix_cortex_a53_843419_);
         CHECK_EQ(adrp & 0xfc000000u, 0x14000000u);  // B <thunk>
-        CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size()));
+        CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize);
         size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
         CHECK_LE(num_thunks, processed_adrp_thunks_);
         uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset;
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 21f9367..b3af4c6 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -66,7 +66,7 @@
     // We want to put the method3 at a very precise offset.
     const uint32_t last_method_offset = method1_offset + distance_without_thunks;
     const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
-    CHECK(IsAligned<kArm64Alignment>(gap_end));
+    CHECK_ALIGNED(gap_end, kArm64Alignment);
 
     // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
     // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
@@ -396,8 +396,10 @@
   };
 
   constexpr uint32_t max_positive_disp = 128 * MB - 4u;
-  uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches,
-                                                   kNopCode, ArrayRef<const LinkerPatch>(),
+  uint32_t last_method_idx = Create2MethodsWithGap(method1_code,
+                                                   ArrayRef<const LinkerPatch>(method1_patches),
+                                                   kNopCode,
+                                                   ArrayRef<const LinkerPatch>(),
                                                    bl_offset_in_method1 + max_positive_disp);
   ASSERT_EQ(expected_last_method_idx, last_method_idx);
 
@@ -420,8 +422,10 @@
   };
 
   constexpr uint32_t max_negative_disp = 128 * MB;
-  uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
-                                                   last_method_code, last_method_patches,
+  uint32_t last_method_idx = Create2MethodsWithGap(kNopCode,
+                                                   ArrayRef<const LinkerPatch>(),
+                                                   last_method_code,
+                                                   ArrayRef<const LinkerPatch>(last_method_patches),
                                                    max_negative_disp - bl_offset_in_last_method);
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t last_method_offset = GetMethodOffset(last_method_idx);
@@ -445,7 +449,10 @@
 
   constexpr uint32_t just_over_max_positive_disp = 128 * MB;
   uint32_t last_method_idx = Create2MethodsWithGap(
-      method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(),
+      method1_code,
+      ArrayRef<const LinkerPatch>(method1_patches),
+      kNopCode,
+      ArrayRef<const LinkerPatch>(),
       bl_offset_in_method1 + just_over_max_positive_disp);
   ASSERT_EQ(expected_last_method_idx, last_method_idx);
 
@@ -474,7 +481,8 @@
 
   constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
   uint32_t last_method_idx = Create2MethodsWithGap(
-      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches,
+      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
+      ArrayRef<const LinkerPatch>(last_method_patches),
       just_over_max_negative_disp - bl_offset_in_last_method);
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t last_method_offset = GetMethodOffset(last_method_idx);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index a98a304..4318ea5 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -374,9 +374,7 @@
       uint32_t quick_code_offset = 0;
 
       const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      CHECK(quick_code != nullptr);
       uint32_t code_size = quick_code->size() * sizeof(uint8_t);
-      CHECK_NE(code_size, 0U);
       uint32_t thumb_offset = compiled_method->CodeDelta();
 
       // Deduplicate code arrays if we are not producing debuggable code.
@@ -394,16 +392,18 @@
         }
       }
 
-      MethodReference method_ref(dex_file_, it.GetMemberIndex());
-      auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
-      if (method_lb != writer_->method_offset_map_.map.end() &&
-          !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
-        // TODO: Should this be a hard failure?
-        LOG(WARNING) << "Multiple definitions of "
-            << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
-            << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
-      } else {
-        writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset);
+      if (code_size != 0) {
+        MethodReference method_ref(dex_file_, it.GetMemberIndex());
+        auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
+        if (method_lb != writer_->method_offset_map_.map.end() &&
+            !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
+          // TODO: Should this be a hard failure?
+          LOG(WARNING) << "Multiple definitions of "
+              << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
+              << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
+        } else {
+          writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset);
+        }
       }
 
       // Update quick method header.
@@ -411,21 +411,24 @@
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
       uint32_t mapping_table_offset = method_header->mapping_table_offset_;
       uint32_t vmap_table_offset = method_header->vmap_table_offset_;
+      // If we don't have quick code, then we must have a vmap, as that is how the dex2dex
+      // compiler records its transformations.
+      DCHECK(quick_code != nullptr || vmap_table_offset != 0);
       uint32_t gc_map_offset = method_header->gc_map_offset_;
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
-      if (mapping_table_offset != 0u) {
+      if (mapping_table_offset != 0u && code_offset != 0u) {
         mapping_table_offset += code_offset;
-        DCHECK_LT(mapping_table_offset, code_offset);
+        DCHECK_LT(mapping_table_offset, code_offset) << "Overflow in oat offsets";
       }
-      if (vmap_table_offset != 0u) {
+      if (vmap_table_offset != 0u && code_offset != 0u) {
         vmap_table_offset += code_offset;
-        DCHECK_LT(vmap_table_offset, code_offset);
+        DCHECK_LT(vmap_table_offset, code_offset) << "Overflow in oat offsets";
       }
-      if (gc_map_offset != 0u) {
+      if (gc_map_offset != 0u && code_offset != 0u) {
         gc_map_offset += code_offset;
-        DCHECK_LT(gc_map_offset, code_offset);
+        DCHECK_LT(gc_map_offset, code_offset) << "Overflow in oat offsets";
       }
       uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
       uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
@@ -534,7 +537,7 @@
                               const ClassDataItemIterator& it,
                               uint32_t thumb_offset) {
     offset_ = writer_->relative_patcher_->ReserveSpace(
-              offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
+        offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
     offset_ = compiled_method->AlignCode(offset_);
     DCHECK_ALIGNED_PARAM(offset_,
                          GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
@@ -619,15 +622,19 @@
         *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr,
         invoke_type);
     if (method == nullptr) {
-      LOG(ERROR) << "Unexpected failure to resolve a method: "
-                 << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
+      LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
+                          << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
       soa.Self()->AssertPendingException();
       mirror::Throwable* exc = soa.Self()->GetException();
       std::string dump = exc->Dump();
       LOG(FATAL) << dump;
+      UNREACHABLE();
     }
-    method->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(offsets.code_offset_),
-                                                      pointer_size_);
+
+    if (compiled_method != nullptr && compiled_method->GetQuickCode()->size() != 0) {
+      method->SetEntryPointFromQuickCompiledCodePtrSize(
+          reinterpret_cast<void*>(offsets.code_offset_), pointer_size_);
+    }
 
     return true;
   }
@@ -689,85 +696,82 @@
       OutputStream* out = out_;
 
       const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      if (quick_code != nullptr) {
-        // Need a wrapper if we create a copy for patching.
-        ArrayRef<const uint8_t> wrapped(*quick_code);
-        uint32_t code_size = quick_code->size() * sizeof(uint8_t);
-        CHECK_NE(code_size, 0U);
+      // Need a wrapper if we create a copy for patching.
+      ArrayRef<const uint8_t> wrapped(*quick_code);
+      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
 
-        // Deduplicate code arrays.
-        const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
-        if (method_offsets.code_offset_ >= offset_) {
-          offset_ = writer_->relative_patcher_->WriteThunks(out, offset_);
-          if (offset_ == 0u) {
-            ReportWriteFailure("relative call thunk", it);
-            return false;
-          }
-          uint32_t aligned_offset = compiled_method->AlignCode(offset_);
-          uint32_t aligned_code_delta = aligned_offset - offset_;
-          if (aligned_code_delta != 0) {
-            if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
-              ReportWriteFailure("code alignment padding", it);
-              return false;
-            }
-            offset_ += aligned_code_delta;
-            DCHECK_OFFSET_();
-          }
-          DCHECK_ALIGNED_PARAM(offset_,
-                               GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
-          DCHECK_EQ(method_offsets.code_offset_,
-                    offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
-              << PrettyMethod(it.GetMemberIndex(), *dex_file_);
-          const OatQuickMethodHeader& method_header =
-              oat_class->method_headers_[method_offsets_index_];
-          writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
-          if (!out->WriteFully(&method_header, sizeof(method_header))) {
-            ReportWriteFailure("method header", it);
-            return false;
-          }
-          writer_->size_method_header_ += sizeof(method_header);
-          offset_ += sizeof(method_header);
-          DCHECK_OFFSET_();
-
-          if (!compiled_method->GetPatches().empty()) {
-            patched_code_.assign(quick_code->begin(), quick_code->end());
-            wrapped = ArrayRef<const uint8_t>(patched_code_);
-            for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-              if (patch.Type() == kLinkerPatchCallRelative) {
-                // NOTE: Relative calls across oat files are not supported.
-                uint32_t target_offset = GetTargetOffset(patch);
-                uint32_t literal_offset = patch.LiteralOffset();
-                writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset,
-                                                       offset_ + literal_offset, target_offset);
-              } else if (patch.Type() == kLinkerPatchDexCacheArray) {
-                uint32_t target_offset = GetDexCacheOffset(patch);
-                uint32_t literal_offset = patch.LiteralOffset();
-                writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch,
-                                                                   offset_ + literal_offset,
-                                                                   target_offset);
-              } else if (patch.Type() == kLinkerPatchCall) {
-                uint32_t target_offset = GetTargetOffset(patch);
-                PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
-              } else if (patch.Type() == kLinkerPatchMethod) {
-                ArtMethod* method = GetTargetMethod(patch);
-                PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method);
-              } else if (patch.Type() == kLinkerPatchType) {
-                mirror::Class* type = GetTargetType(patch);
-                PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type);
-              }
-            }
-          }
-
-          writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size);
-          if (!out->WriteFully(wrapped.data(), code_size)) {
-            ReportWriteFailure("method code", it);
-            return false;
-          }
-          writer_->size_code_ += code_size;
-          offset_ += code_size;
+      // Deduplicate code arrays.
+      const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
+      if (method_offsets.code_offset_ > offset_) {
+        offset_ = writer_->relative_patcher_->WriteThunks(out, offset_);
+        if (offset_ == 0u) {
+          ReportWriteFailure("relative call thunk", it);
+          return false;
         }
+        uint32_t aligned_offset = compiled_method->AlignCode(offset_);
+        uint32_t aligned_code_delta = aligned_offset - offset_;
+        if (aligned_code_delta != 0) {
+          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+            ReportWriteFailure("code alignment padding", it);
+            return false;
+          }
+          offset_ += aligned_code_delta;
+          DCHECK_OFFSET_();
+        }
+        DCHECK_ALIGNED_PARAM(offset_,
+                             GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
+        DCHECK_EQ(method_offsets.code_offset_,
+                  offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
+            << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+        const OatQuickMethodHeader& method_header =
+            oat_class->method_headers_[method_offsets_index_];
+        writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
+        if (!out->WriteFully(&method_header, sizeof(method_header))) {
+          ReportWriteFailure("method header", it);
+          return false;
+        }
+        writer_->size_method_header_ += sizeof(method_header);
+        offset_ += sizeof(method_header);
         DCHECK_OFFSET_();
+
+        if (!compiled_method->GetPatches().empty()) {
+          patched_code_.assign(quick_code->begin(), quick_code->end());
+          wrapped = ArrayRef<const uint8_t>(patched_code_);
+          for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+            if (patch.Type() == kLinkerPatchCallRelative) {
+              // NOTE: Relative calls across oat files are not supported.
+              uint32_t target_offset = GetTargetOffset(patch);
+              uint32_t literal_offset = patch.LiteralOffset();
+              writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset,
+                                                     offset_ + literal_offset, target_offset);
+            } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+              uint32_t target_offset = GetDexCacheOffset(patch);
+              uint32_t literal_offset = patch.LiteralOffset();
+              writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch,
+                                                                 offset_ + literal_offset,
+                                                                 target_offset);
+            } else if (patch.Type() == kLinkerPatchCall) {
+              uint32_t target_offset = GetTargetOffset(patch);
+              PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
+            } else if (patch.Type() == kLinkerPatchMethod) {
+              ArtMethod* method = GetTargetMethod(patch);
+              PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method);
+            } else if (patch.Type() == kLinkerPatchType) {
+              mirror::Class* type = GetTargetType(patch);
+              PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type);
+            }
+          }
+        }
+
+        writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size);
+        if (!out->WriteFully(wrapped.data(), code_size)) {
+          ReportWriteFailure("method code", it);
+          return false;
+        }
+        writer_->size_code_ += code_size;
+        offset_ += code_size;
       }
+      DCHECK_OFFSET_();
       ++method_offsets_index_;
     }
 
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 329112a..84201c3 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -154,11 +154,6 @@
   // entry block. Any following blocks would have had the join block
   // as a dominator, and `MergeWith` handles changing that to the
   // entry block.
-
-  // Remove the original condition if it is now unused.
-  if (!if_condition->HasUses()) {
-    if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition);
-  }
 }
 
 void HBooleanSimplifier::Run() {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1319f2c..52a3a15 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -804,7 +804,9 @@
       invoke_type = kDirect;
       break;
     case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
     case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
       invoke_type = kVirtual;
       break;
     case Instruction::INVOKE_INTERFACE:
@@ -1051,7 +1053,15 @@
                                              bool is_put) {
   uint32_t source_or_dest_reg = instruction.VRegA_22c();
   uint32_t obj_reg = instruction.VRegB_22c();
-  uint16_t field_index = instruction.VRegC_22c();
+  uint16_t field_index;
+  if (instruction.IsQuickened()) {
+    if (!CanDecodeQuickenedInfo()) {
+      return false;
+    }
+    field_index = LookupQuickenedInfo(dex_pc);
+  } else {
+    field_index = instruction.VRegC_22c();
+  }
 
   ScopedObjectAccess soa(Thread::Current());
   ArtField* resolved_field =
@@ -1560,6 +1570,17 @@
   }
 }
 
+bool HGraphBuilder::CanDecodeQuickenedInfo() const {
+  return interpreter_metadata_ != nullptr;
+}
+
+uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
+  DCHECK(interpreter_metadata_ != nullptr);
+  uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+  DCHECK_EQ(dex_pc, dex_pc_in_map);
+  return DecodeUnsignedLeb128(&interpreter_metadata_);
+}
+
 bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
   if (current_block_ == nullptr) {
     return true;  // Dead code
@@ -1657,6 +1678,7 @@
       break;
     }
 
+    case Instruction::RETURN_VOID_NO_BARRIER:
     case Instruction::RETURN_VOID: {
       BuildReturn(instruction, Primitive::kPrimVoid);
       break;
@@ -1705,8 +1727,17 @@
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_VIRTUAL: {
-      uint32_t method_idx = instruction.VRegB_35c();
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_35c();
+      }
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
       instruction.GetVarArgs(args);
@@ -1721,8 +1752,17 @@
     case Instruction::INVOKE_INTERFACE_RANGE:
     case Instruction::INVOKE_STATIC_RANGE:
     case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE: {
-      uint32_t method_idx = instruction.VRegB_3rc();
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_3rc();
+      }
       uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
       uint32_t register_index = instruction.VRegC();
       if (!BuildInvoke(instruction, dex_pc, method_idx,
@@ -2375,12 +2415,19 @@
       break;
 
     case Instruction::IGET:
+    case Instruction::IGET_QUICK:
     case Instruction::IGET_WIDE:
+    case Instruction::IGET_WIDE_QUICK:
     case Instruction::IGET_OBJECT:
+    case Instruction::IGET_OBJECT_QUICK:
     case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BOOLEAN_QUICK:
     case Instruction::IGET_BYTE:
+    case Instruction::IGET_BYTE_QUICK:
     case Instruction::IGET_CHAR:
-    case Instruction::IGET_SHORT: {
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT:
+    case Instruction::IGET_SHORT_QUICK: {
       if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
         return false;
       }
@@ -2388,12 +2435,19 @@
     }
 
     case Instruction::IPUT:
+    case Instruction::IPUT_QUICK:
     case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
     case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_OBJECT_QUICK:
     case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BOOLEAN_QUICK:
     case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_BYTE_QUICK:
     case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_SHORT: {
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_SHORT_QUICK: {
       if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
         return false;
       }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 76610f5..ad5d923 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -39,7 +39,8 @@
                 const DexCompilationUnit* const outer_compilation_unit,
                 const DexFile* dex_file,
                 CompilerDriver* driver,
-                OptimizingCompilerStats* compiler_stats)
+                OptimizingCompilerStats* compiler_stats,
+                const uint8_t* interpreter_metadata)
       : arena_(graph->GetArena()),
         branch_targets_(graph->GetArena(), 0),
         locals_(graph->GetArena(), 0),
@@ -55,7 +56,8 @@
         code_start_(nullptr),
         latest_result_(nullptr),
         can_use_baseline_for_string_init_(true),
-        compilation_stats_(compiler_stats) {}
+        compilation_stats_(compiler_stats),
+        interpreter_metadata_(interpreter_metadata) {}
 
   // Only for unit testing.
   HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt)
@@ -120,6 +122,9 @@
                             const DexFile::CodeItem& code_item,
                             const DexFile::TryItem& try_item);
 
+  bool CanDecodeQuickenedInfo() const;
+  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+
   void InitializeLocals(uint16_t count);
   HLocal* GetLocalAt(int register_index) const;
   void UpdateLocal(int register_index, HInstruction* instruction) const;
@@ -307,6 +312,8 @@
 
   OptimizingCompilerStats* compilation_stats_;
 
+  const uint8_t* interpreter_metadata_;
+
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 5de629d..6269d16 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -128,7 +128,7 @@
     for (i.Advance(); !i.Done(); i.Advance()) {
       HInstruction* inst = i.Current();
       DCHECK(!inst->IsControlFlow());
-      if (!inst->HasSideEffects()
+      if (!inst->DoesAnyWrite()
           && !inst->CanThrow()
           && !inst->IsSuspendCheck()
           // If we added an explicit barrier then we should keep it.
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 9679d0a..cfebb77 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -136,6 +136,33 @@
   VisitInstruction(check);
 }
 
+void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
+  // Ensure that all exception handlers are catch blocks and that handlers
+  // are not listed multiple times.
+  // Note that a normal-flow successor may be a catch block before CFG
+  // simplification. We only test normal-flow successors in SsaChecker.
+  for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) {
+    HBasicBlock* handler = it.Current();
+    if (!handler->IsCatchBlock()) {
+      AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which "
+                            "is not a catch block.",
+                            current_block_->GetBlockId(),
+                            try_boundary->DebugName(),
+                            try_boundary->GetId(),
+                            handler->GetBlockId()));
+    }
+    if (current_block_->GetSuccessors().Contains(
+            handler, /* start_from */ it.CurrentSuccessorIndex() + 1)) {
+      AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
+                            handler->GetBlockId(),
+                            try_boundary->DebugName(),
+                            try_boundary->GetId()));
+    }
+  }
+
+  VisitInstruction(try_boundary);
+}
+
 void GraphChecker::VisitInstruction(HInstruction* instruction) {
   if (seen_ids_.IsBitSet(instruction->GetId())) {
     AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -301,11 +328,32 @@
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
   super_type::VisitBasicBlock(block);
 
+  // Ensure that catch blocks are not normal successors, and normal blocks are
+  // never exceptional successors.
+  const size_t num_normal_successors = block->NumberOfNormalSuccessors();
+  for (size_t j = 0; j < num_normal_successors; ++j) {
+    HBasicBlock* successor = block->GetSuccessors().Get(j);
+    if (successor->IsCatchBlock()) {
+      AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
+                            successor->GetBlockId(),
+                            block->GetBlockId()));
+    }
+  }
+  for (size_t j = num_normal_successors, e = block->GetSuccessors().Size(); j < e; ++j) {
+    HBasicBlock* successor = block->GetSuccessors().Get(j);
+    if (!successor->IsCatchBlock()) {
+      AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
+                            successor->GetBlockId(),
+                            block->GetBlockId()));
+    }
+  }
+
   // Ensure there is no critical edge (i.e., an edge connecting a
   // block with multiple successors to a block with multiple
-  // predecessors).
-  if (block->GetSuccessors().Size() > 1) {
-    for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+  // predecessors). Exceptional edges are synthesized and hence
+  // not accounted for.
+  if (block->NumberOfNormalSuccessors() > 1) {
+    for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
       HBasicBlock* successor = block->GetSuccessors().Get(j);
       if (successor->GetPredecessors().Size() > 1) {
         AddError(StringPrintf("Critical edge between blocks %d and %d.",
@@ -326,6 +374,54 @@
     }
   }
 
+  // Ensure try membership information is consistent.
+  HTryBoundary* try_entry = block->GetTryEntry();
+  if (block->IsCatchBlock()) {
+    if (try_entry != nullptr) {
+      AddError(StringPrintf("Catch blocks should not be try blocks but catch block %d "
+                            "has try entry %s:%d.",
+                            block->GetBlockId(),
+                            try_entry->DebugName(),
+                            try_entry->GetId()));
+    }
+
+    if (block->IsLoopHeader()) {
+      AddError(StringPrintf("Catch blocks should not be loop headers but catch block %d is.",
+                            block->GetBlockId()));
+    }
+  } else {
+    for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) {
+      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+      HTryBoundary* incoming_try_entry = predecessor->ComputeTryEntryOfSuccessors();
+      if (try_entry == nullptr) {
+        if (incoming_try_entry != nullptr) {
+          AddError(StringPrintf("Block %d has no try entry but try entry %s:%d follows "
+                                "from predecessor %d.",
+                                block->GetBlockId(),
+                                incoming_try_entry->DebugName(),
+                                incoming_try_entry->GetId(),
+                                predecessor->GetBlockId()));
+        }
+      } else if (incoming_try_entry == nullptr) {
+        AddError(StringPrintf("Block %d has try entry %s:%d but no try entry follows "
+                              "from predecessor %d.",
+                              block->GetBlockId(),
+                              try_entry->DebugName(),
+                              try_entry->GetId(),
+                              predecessor->GetBlockId()));
+      } else if (!incoming_try_entry->HasSameExceptionHandlersAs(*try_entry)) {
+        AddError(StringPrintf("Block %d has try entry %s:%d which is not consistent "
+                              "with %s:%d that follows from predecessor %d.",
+                              block->GetBlockId(),
+                              try_entry->DebugName(),
+                              try_entry->GetId(),
+                              incoming_try_entry->DebugName(),
+                              incoming_try_entry->GetId(),
+                              predecessor->GetBlockId()));
+      }
+    }
+  }
+
   if (block->IsLoopHeader()) {
     CheckLoop(block);
   }
@@ -472,32 +568,6 @@
                           phi->GetBlock()->GetBlockId()));
   }
 
-  // Ensure the number of inputs of a phi is the same as the number of
-  // its predecessors.
-  const GrowableArray<HBasicBlock*>& predecessors =
-    phi->GetBlock()->GetPredecessors();
-  if (phi->InputCount() != predecessors.Size()) {
-    AddError(StringPrintf(
-        "Phi %d in block %d has %zu inputs, "
-        "but block %d has %zu predecessors.",
-        phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
-        phi->GetBlock()->GetBlockId(), predecessors.Size()));
-  } else {
-    // Ensure phi input at index I either comes from the Ith
-    // predecessor or from a block that dominates this predecessor.
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      HInstruction* input = phi->InputAt(i);
-      HBasicBlock* predecessor = predecessors.Get(i);
-      if (!(input->GetBlock() == predecessor
-            || input->GetBlock()->Dominates(predecessor))) {
-        AddError(StringPrintf(
-            "Input %d at index %zu of phi %d from block %d is not defined in "
-            "predecessor number %zu nor in a block dominating it.",
-            input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
-            i));
-      }
-    }
-  }
   // Ensure that the inputs have the same primitive kind as the phi.
   for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
     HInstruction* input = phi->InputAt(i);
@@ -516,6 +586,38 @@
                           phi->GetBlock()->GetBlockId(),
                           Primitive::PrettyDescriptor(phi->GetType())));
   }
+
+  if (phi->IsCatchPhi()) {
+    // The number of inputs of a catch phi corresponds to the total number of
+    // throwing instructions caught by this catch block.
+  } else {
+    // Ensure the number of inputs of a non-catch phi is the same as the number
+    // of its predecessors.
+    const GrowableArray<HBasicBlock*>& predecessors =
+      phi->GetBlock()->GetPredecessors();
+    if (phi->InputCount() != predecessors.Size()) {
+      AddError(StringPrintf(
+          "Phi %d in block %d has %zu inputs, "
+          "but block %d has %zu predecessors.",
+          phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
+          phi->GetBlock()->GetBlockId(), predecessors.Size()));
+    } else {
+      // Ensure phi input at index I either comes from the Ith
+      // predecessor or from a block that dominates this predecessor.
+      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+        HInstruction* input = phi->InputAt(i);
+        HBasicBlock* predecessor = predecessors.Get(i);
+        if (!(input->GetBlock() == predecessor
+              || input->GetBlock()->Dominates(predecessor))) {
+          AddError(StringPrintf(
+              "Input %d at index %zu of phi %d from block %d is not defined in "
+              "predecessor number %zu nor in a block dominating it.",
+              input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
+              i));
+        }
+      }
+    }
+  }
 }
 
 void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 7c72e23..0e270db 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -48,6 +48,9 @@
   // Check that the HasBoundsChecks() flag is set for bounds checks.
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
 
+  // Check successors of blocks ending in TryBoundary.
+  void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
+
   // Check that HCheckCast and HInstanceOf have HLoadClass as second input.
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index bc9631b..d6b5636 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -158,12 +158,14 @@
                           std::ostream& output,
                           const char* pass_name,
                           bool is_after_pass,
+                          bool graph_in_bad_state,
                           const CodeGenerator& codegen,
                           const DisassemblyInformation* disasm_info = nullptr)
       : HGraphDelegateVisitor(graph),
         output_(output),
         pass_name_(pass_name),
         is_after_pass_(is_after_pass),
+        graph_in_bad_state_(graph_in_bad_state),
         codegen_(codegen),
         disasm_info_(disasm_info),
         disassembler_(disasm_info_ != nullptr
@@ -251,11 +253,9 @@
   void PrintSuccessors(HBasicBlock* block) {
     AddIndent();
     output_ << "successors";
-    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
-      if (!block->IsExceptionalSuccessor(i)) {
-        HBasicBlock* successor = block->GetSuccessors().Get(i);
-        output_ << " \"B" << successor->GetBlockId() << "\" ";
-      }
+    for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
+      HBasicBlock* successor = block->GetSuccessors().Get(i);
+      output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
     output_<< std::endl;
   }
@@ -263,11 +263,9 @@
   void PrintExceptionHandlers(HBasicBlock* block) {
     AddIndent();
     output_ << "xhandlers";
-    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
-      if (block->IsExceptionalSuccessor(i)) {
-        HBasicBlock* handler = block->GetSuccessors().Get(i);
-        output_ << " \"B" << handler->GetBlockId() << "\" ";
-      }
+    for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().Size(); ++i) {
+      HBasicBlock* handler = block->GetSuccessors().Get(i);
+      output_ << " \"B" << handler->GetBlockId() << "\" ";
     }
     if (block->IsExitBlock() &&
         (disasm_info_ != nullptr) &&
@@ -351,6 +349,7 @@
 
   void VisitPhi(HPhi* phi) OVERRIDE {
     StartAttributeStream("reg") << phi->GetRegNumber();
+    StartAttributeStream("is_catch_phi") << std::boolalpha << phi->IsCatchPhi() << std::noboolalpha;
   }
 
   void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE {
@@ -581,7 +580,11 @@
 
   void Run() {
     StartTag("cfg");
-    std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)");
+    std::string pass_desc = std::string(pass_name_)
+                          + " ("
+                          + (is_after_pass_ ? "after" : "before")
+                          + (graph_in_bad_state_ ? ", bad_state" : "")
+                          + ")";
     PrintProperty("name", pass_desc.c_str());
     if (disasm_info_ != nullptr) {
       DumpDisassemblyBlockForFrameEntry();
@@ -650,6 +653,7 @@
   std::ostream& output_;
   const char* pass_name_;
   const bool is_after_pass_;
+  const bool graph_in_bad_state_;
   const CodeGenerator& codegen_;
   const DisassemblyInformation* disasm_info_;
   std::unique_ptr<HGraphVisualizerDisassembler> disassembler_;
@@ -665,7 +669,7 @@
 
 void HGraphVisualizer::PrintHeader(const char* method_name) const {
   DCHECK(output_ != nullptr);
-  HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
+  HGraphVisualizerPrinter printer(graph_, *output_, "", true, false, codegen_);
   printer.StartTag("compilation");
   printer.PrintProperty("name", method_name);
   printer.PrintProperty("method", method_name);
@@ -673,10 +677,17 @@
   printer.EndTag("compilation");
 }
 
-void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const {
+void HGraphVisualizer::DumpGraph(const char* pass_name,
+                                 bool is_after_pass,
+                                 bool graph_in_bad_state) const {
   DCHECK(output_ != nullptr);
   if (!graph_->GetBlocks().IsEmpty()) {
-    HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_);
+    HGraphVisualizerPrinter printer(graph_,
+                                    *output_,
+                                    pass_name,
+                                    is_after_pass,
+                                    graph_in_bad_state,
+                                    codegen_);
     printer.Run();
   }
 }
@@ -684,8 +695,13 @@
 void HGraphVisualizer::DumpGraphWithDisassembly() const {
   DCHECK(output_ != nullptr);
   if (!graph_->GetBlocks().IsEmpty()) {
-    HGraphVisualizerPrinter printer(
-        graph_, *output_, "disassembly", true, codegen_, codegen_.GetDisassemblyInformation());
+    HGraphVisualizerPrinter printer(graph_,
+                                    *output_,
+                                    "disassembly",
+                                    /* is_after_pass */ true,
+                                    /* graph_in_bad_state */ false,
+                                    codegen_,
+                                    codegen_.GetDisassemblyInformation());
     printer.Run();
   }
 }
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index b6b66df..66588f6 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -104,7 +104,7 @@
                    const CodeGenerator& codegen);
 
   void PrintHeader(const char* method_name) const;
-  void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
+  void DumpGraph(const char* pass_name, bool is_after_pass, bool graph_in_bad_state) const;
   void DumpGraphWithDisassembly() const;
 
  private:
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 708733e..3900646 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -120,7 +120,7 @@
   // Removes all instructions in the set affected by the given side effects.
   void Kill(SideEffects side_effects) {
     DeleteAllImpureWhich([side_effects](Node* node) {
-      return node->GetInstruction()->GetSideEffects().DependsOn(side_effects);
+      return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
     });
   }
 
@@ -264,7 +264,7 @@
   // odd buckets to speed up deletion.
   size_t HashCode(HInstruction* instruction) const {
     size_t hash_code = instruction->ComputeHashCode();
-    if (instruction->GetSideEffects().HasDependencies()) {
+    if (instruction->GetSideEffects().DoesAnyRead()) {
       return (hash_code << 1) | 0;
     } else {
       return (hash_code << 1) | 1;
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index d8a09ff..5c6239b 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -206,7 +206,7 @@
   // and the body to be GVN'ed.
   loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(parameter,
                                                                parameter,
-                                                               Primitive::kPrimNot,
+                                                               Primitive::kPrimBoolean,
                                                                MemberOffset(42),
                                                                false,
                                                                kUnknownFieldIndex,
@@ -323,9 +323,10 @@
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
 
-    ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects());
-    ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects());
-    ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite());
+    ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite());
+    ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite());
+    ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite());
   }
 
   // Check that the side effects of the outer loop does not affect the inner loop.
@@ -343,10 +344,10 @@
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
 
-    ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects());
-    ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects());
-    ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects());
-    ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite());
+    ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite());
+    ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite());
+    ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite());
   }
 
   // Check that the side effects of the inner loop affects the outer loop.
@@ -365,10 +366,10 @@
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
 
-    ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects());
-    ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects());
-    ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects());
-    ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite());
+    ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite());
+    ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite());
+    ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite());
   }
 }
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 860b199..1551c15 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -324,7 +324,8 @@
                         &outer_compilation_unit_,
                         resolved_method->GetDexFile(),
                         compiler_driver_,
-                        &inline_stats);
+                        &inline_stats,
+                        resolved_method->GetQuickenedInfo());
 
   if (!builder.BuildGraph(*code_item)) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 2535ea2..5b89b4e 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -115,7 +115,7 @@
         HInstruction* instruction = inst_it.Current();
         if (instruction->CanBeMoved()
             && (!instruction->CanThrow() || !found_first_non_hoisted_throwing_instruction_in_loop)
-            && !instruction->GetSideEffects().DependsOn(loop_effects)
+            && !instruction->GetSideEffects().MayDependOn(loop_effects)
             && InputsAreDefinedBeforeLoop(instruction)) {
           // We need to update the environment if the instruction has a loop header
           // phi in it.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e1856fa..296c1b0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -98,26 +98,31 @@
 }
 
 void HGraph::BuildDominatorTree() {
+  // (1) Simplify the CFG so that catch blocks have only exceptional incoming
+  //     edges. This invariant simplifies building SSA form because Phis cannot
+  //     collect both normal- and exceptional-flow values at the same time.
+  SimplifyCatchBlocks();
+
   ArenaBitVector visited(arena_, blocks_.Size(), false);
 
-  // (1) Find the back edges in the graph doing a DFS traversal.
+  // (2) Find the back edges in the graph doing a DFS traversal.
   FindBackEdges(&visited);
 
-  // (2) Remove instructions and phis from blocks not visited during
+  // (3) Remove instructions and phis from blocks not visited during
   //     the initial DFS as users from other instructions, so that
   //     users can be safely removed before uses later.
   RemoveInstructionsAsUsersFromDeadBlocks(visited);
 
-  // (3) Remove blocks not visited during the initial DFS.
+  // (4) Remove blocks not visited during the initial DFS.
   //     Step (4) requires dead blocks to be removed from the
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
-  // (4) Simplify the CFG now, so that we don't need to recompute
+  // (5) Simplify the CFG now, so that we don't need to recompute
   //     dominators and the reverse post order.
   SimplifyCFG();
 
-  // (5) Compute the dominance information and the reverse post order.
+  // (6) Compute the dominance information and the reverse post order.
   ComputeDominanceInformation();
 }
 
@@ -261,6 +266,83 @@
   info->SetSuspendCheck(first_instruction->AsSuspendCheck());
 }
 
+static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
+  HBasicBlock* predecessor = block.GetPredecessors().Get(pred_idx);
+  if (!predecessor->EndsWithTryBoundary()) {
+    // Only edges from HTryBoundary can be exceptional.
+    return false;
+  }
+  HTryBoundary* try_boundary = predecessor->GetLastInstruction()->AsTryBoundary();
+  if (try_boundary->GetNormalFlowSuccessor() == &block) {
+    // This block is the normal-flow successor of `try_boundary`, but it could
+    // also be one of its exception handlers if catch blocks have not been
+    // simplified yet. Predecessors are unordered, so we will consider the first
+    // occurrence to be the normal edge and a possible second occurrence to be
+    // the exceptional edge.
+    return !block.IsFirstIndexOfPredecessor(predecessor, pred_idx);
+  } else {
+    // This is not the normal-flow successor of `try_boundary`, hence it must be
+    // one of its exception handlers.
+    DCHECK(try_boundary->HasExceptionHandler(block));
+    return true;
+  }
+}
+
+void HGraph::SimplifyCatchBlocks() {
+  for (size_t i = 0; i < blocks_.Size(); ++i) {
+    HBasicBlock* catch_block = blocks_.Get(i);
+    if (!catch_block->IsCatchBlock()) {
+      continue;
+    }
+
+    bool exceptional_predecessors_only = true;
+    for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) {
+      if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
+        exceptional_predecessors_only = false;
+        break;
+      }
+    }
+
+    if (!exceptional_predecessors_only) {
+      // Catch block has normal-flow predecessors and needs to be simplified.
+      // Splitting the block before its first instruction moves all its
+      // instructions into `normal_block` and links the two blocks with a Goto.
+      // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
+      // leaving `catch_block` with the exceptional edges only.
+      // Note that catch blocks with normal-flow predecessors cannot begin with
+      // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
+      DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
+      HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
+      for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) {
+        if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
+          catch_block->GetPredecessors().Get(j)->ReplaceSuccessor(catch_block, normal_block);
+          --j;
+        }
+      }
+    }
+  }
+}
+
+void HGraph::ComputeTryBlockInformation() {
+  // Iterate in reverse post order to propagate try membership information from
+  // predecessors to their successors.
+  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsEntryBlock() || block->IsCatchBlock()) {
+      // Catch blocks after simplification have only exceptional predecessors
+      // and hence are never in tries.
+      continue;
+    }
+
+    // Infer try membership from the first predecessor. Having simplified loops,
+    // the first predecessor can never be a back edge and therefore it must have
+    // been visited already and had its try membership set.
+    HBasicBlock* first_predecessor = block->GetPredecessors().Get(0);
+    DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
+    block->SetTryEntry(first_predecessor->ComputeTryEntryOfSuccessors());
+  }
+}
+
 void HGraph::SimplifyCFG() {
   // Simplify the CFG for future analysis, and code generation:
   // (1): Split critical edges.
@@ -268,9 +350,10 @@
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     HBasicBlock* block = blocks_.Get(i);
     if (block == nullptr) continue;
-    if (block->GetSuccessors().Size() > 1) {
+    if (block->NumberOfNormalSuccessors() > 1) {
       for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
         HBasicBlock* successor = block->GetSuccessors().Get(j);
+        DCHECK(!successor->IsCatchBlock());
         if (successor->GetPredecessors().Size() > 1) {
           SplitCriticalEdge(block, successor);
           --j;
@@ -288,6 +371,11 @@
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
+      if (block->IsCatchBlock()) {
+        // TODO: Dealing with exceptional back edges could be tricky because
+        //       they only approximate the real control flow. Bail out for now.
+        return false;
+      }
       HLoopInformation* info = block->GetLoopInformation();
       if (!info->Populate()) {
         // Abort if the loop is non natural. We currently bailout in such cases.
@@ -1086,10 +1174,20 @@
   return new_block;
 }
 
-bool HBasicBlock::IsExceptionalSuccessor(size_t idx) const {
-  return !GetInstructions().IsEmpty()
-      && GetLastInstruction()->IsTryBoundary()
-      && GetLastInstruction()->AsTryBoundary()->IsExceptionalSuccessor(idx);
+HTryBoundary* HBasicBlock::ComputeTryEntryOfSuccessors() const {
+  if (EndsWithTryBoundary()) {
+    HTryBoundary* try_boundary = GetLastInstruction()->AsTryBoundary();
+    if (try_boundary->IsEntry()) {
+      DCHECK(try_entry_ == nullptr);
+      return try_boundary;
+    } else {
+      DCHECK(try_entry_ != nullptr);
+      DCHECK(try_entry_->HasSameExceptionHandlersAs(*try_boundary));
+      return nullptr;
+    }
+  } else {
+    return try_entry_;
+  }
 }
 
 static bool HasOnlyOneInstruction(const HBasicBlock& block) {
@@ -1114,10 +1212,29 @@
   return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf();
 }
 
+bool HBasicBlock::EndsWithTryBoundary() const {
+  return !GetInstructions().IsEmpty() && GetLastInstruction()->IsTryBoundary();
+}
+
 bool HBasicBlock::HasSinglePhi() const {
   return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
 }
 
+bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const {
+  if (GetBlock()->GetSuccessors().Size() != other.GetBlock()->GetSuccessors().Size()) {
+    return false;
+  }
+
+  // Exception handler lists cannot contain duplicates, which makes it
+  // sufficient to test inclusion only in one direction.
+  for (HExceptionHandlerIterator it(other); !it.Done(); it.Advance()) {
+    if (!HasExceptionHandler(*it.Current())) {
+      return false;
+    }
+  }
+  return true;
+}
+
 size_t HInstructionList::CountSize() const {
   size_t size = 0;
   HInstruction* current = first_instruction_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2c7bac5..1190fae 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -49,6 +49,7 @@
 class HNullConstant;
 class HPhi;
 class HSuspendCheck;
+class HTryBoundary;
 class LiveInterval;
 class LocationSummary;
 class SlowPathCode;
@@ -182,6 +183,10 @@
     // visit for eliminating dead phis: a dead phi can only have loop header phi
     // users remaining when being visited.
     if (!AnalyzeNaturalLoops()) return false;
+    // Precompute per-block try membership before entering the SSA builder,
+    // which needs the information to build catch block phis from values of
+    // locals at throwing instructions inside try blocks.
+    ComputeTryBlockInformation();
     TransformToSsa();
     in_ssa_form_ = true;
     return true;
@@ -193,12 +198,17 @@
   void BuildDominatorTree();
   void TransformToSsa();
   void SimplifyCFG();
+  void SimplifyCatchBlocks();
 
   // Analyze all natural loops in this graph. Returns false if one
   // loop is not natural, that is the header does not dominate the
   // back edge.
   bool AnalyzeNaturalLoops() const;
 
+  // Iterate over blocks to compute try block membership. Needs reverse post
+  // order and loop information.
+  void ComputeTryBlockInformation();
+
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
   void InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
@@ -730,8 +740,11 @@
     return GetPredecessorIndexOf(predecessor) == idx;
   }
 
-  // Returns whether successor at index `idx` is an exception handler.
-  bool IsExceptionalSuccessor(size_t idx) const;
+  // Returns the number of non-exceptional successors. SsaChecker ensures that
+  // these are stored at the beginning of the successor list.
+  size_t NumberOfNormalSuccessors() const {
+    return EndsWithTryBoundary() ? 1 : GetSuccessors().Size();
+  }
 
   // Split the block into two blocks just before `cursor`. Returns the newly
   // created, latter block. Note that this method will add the block to the
@@ -830,6 +843,15 @@
 
   bool IsInLoop() const { return loop_information_ != nullptr; }
 
+  HTryBoundary* GetTryEntry() const { return try_entry_; }
+  void SetTryEntry(HTryBoundary* try_entry) { try_entry_ = try_entry; }
+  bool IsInTry() const { return try_entry_ != nullptr; }
+
+  // Returns the try entry that this block's successors should have. They will
+  // be in the same try, unless the block ends in a try boundary. In that case,
+  // the appropriate try entry will be returned.
+  HTryBoundary* ComputeTryEntryOfSuccessors() const;
+
   // Returns whether this block dominates the blocked passed as parameter.
   bool Dominates(HBasicBlock* block) const;
 
@@ -846,6 +868,7 @@
 
   bool EndsWithControlFlowInstruction() const;
   bool EndsWithIf() const;
+  bool EndsWithTryBoundary() const;
   bool HasSinglePhi() const;
 
  private:
@@ -864,6 +887,10 @@
   size_t lifetime_end_;
   bool is_catch_block_;
 
+  // If this block is in a try block, `try_entry_` stores one of, possibly
+  // several, TryBoundary instructions entering it.
+  HTryBoundary* try_entry_;
+
   friend class HGraph;
   friend class HInstruction;
 
@@ -1155,13 +1182,25 @@
   HUseListNode<T>* use_node_;
 };
 
-// TODO: Add better documentation to this class and maybe refactor with more suggestive names.
-// - Has(All)SideEffects suggests that all the side effects are present but only ChangesSomething
-//   flag is consider.
-// - DependsOn suggests that there is a real dependency between side effects but it only
-//   checks DependendsOnSomething flag.
-//
-// Represents the side effects an instruction may have.
+/**
+ * Side-effects representation for write/read dependences on fields/arrays.
+ *
+ * The dependence analysis uses type disambiguation (e.g. a float field write
+ * cannot modify the value of an integer field read) and the access type (e.g.
+ * a reference array write cannot modify the value of a reference field read
+ * [although it may modify the reference fetch prior to reading the field,
+ * which is represented by its own write/read dependence]). The analysis
+ * makes conservative points-to assumptions on reference types (e.g. two same
+ * typed arrays are assumed to be the same, and any reference read depends
+ * on any reference read without further regard of its type).
+ *
+ * The internal representation uses the following 36-bit flags assignments:
+ *
+ *   |ARRAY-R  |FIELD-R  |ARRAY-W  |FIELD-W  |
+ *   +---------+---------+---------+---------+
+ *   |543210987|654321098|765432109|876543210|
+ *   |DFJISCBZL|DFJISCBZL|DFJISCBZL|DFJISCBZL|
+ */
 class SideEffects : public ValueObject {
  public:
   SideEffects() : flags_(0) {}
@@ -1171,57 +1210,125 @@
   }
 
   static SideEffects All() {
-    return SideEffects(ChangesSomething().flags_ | DependsOnSomething().flags_);
+    return SideEffects(kAllWrites | kAllReads);
   }
 
-  static SideEffects ChangesSomething() {
-    return SideEffects((1 << kFlagChangesCount) - 1);
+  static SideEffects AllWrites() {
+    return SideEffects(kAllWrites);
   }
 
-  static SideEffects DependsOnSomething() {
-    int count = kFlagDependsOnCount - kFlagChangesCount;
-    return SideEffects(((1 << count) - 1) << kFlagChangesCount);
+  static SideEffects AllReads() {
+    return SideEffects(kAllReads);
   }
 
+  static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) {
+    return is_volatile
+        ? All()
+        : SideEffects(TypeFlagWithAlias(type, kFieldWriteOffset));
+  }
+
+  static SideEffects ArrayWriteOfType(Primitive::Type type) {
+    return SideEffects(TypeFlagWithAlias(type, kArrayWriteOffset));
+  }
+
+  static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) {
+    return is_volatile
+        ? All()
+        : SideEffects(TypeFlagWithAlias(type, kFieldReadOffset));
+  }
+
+  static SideEffects ArrayReadOfType(Primitive::Type type) {
+    return SideEffects(TypeFlagWithAlias(type, kArrayReadOffset));
+  }
+
+  // Combines the side-effects of this and the other.
   SideEffects Union(SideEffects other) const {
     return SideEffects(flags_ | other.flags_);
   }
 
-  bool HasSideEffects() const {
-    size_t all_bits_set = (1 << kFlagChangesCount) - 1;
-    return (flags_ & all_bits_set) != 0;
+  // Returns true if something is written.
+  bool DoesAnyWrite() const {
+    return (flags_ & kAllWrites);
   }
 
-  bool HasAllSideEffects() const {
-    size_t all_bits_set = (1 << kFlagChangesCount) - 1;
-    return all_bits_set == (flags_ & all_bits_set);
+  // Returns true if something is read.
+  bool DoesAnyRead() const {
+    return (flags_ & kAllReads);
   }
 
-  bool DependsOn(SideEffects other) const {
-    size_t depends_flags = other.ComputeDependsFlags();
-    return (flags_ & depends_flags) != 0;
+  // Returns true if nothing is written or read.
+  bool DoesNothing() const {
+    return flags_ == 0;
   }
 
-  bool HasDependencies() const {
-    int count = kFlagDependsOnCount - kFlagChangesCount;
-    size_t all_bits_set = (1 << count) - 1;
-    return ((flags_ >> kFlagChangesCount) & all_bits_set) != 0;
+  // Returns true if potentially everything is written and read
+  // (every type and every kind of access).
+  bool DoesAll() const {
+    return flags_ == (kAllWrites | kAllReads);
+  }
+
+  // Returns true if this may read something written by other.
+  bool MayDependOn(SideEffects other) const {
+    const uint64_t reads = (flags_ & kAllReads) >> kFieldReadOffset;
+    return (other.flags_ & reads);
+  }
+
+  // Returns string representation of flags (for debugging only).
+  // Format: |DFJISCBZL|DFJISCBZL|DFJISCBZL|DFJISCBZL|
+  std::string ToString() const {
+    static const char *kDebug = "LZBCSIJFD";
+    std::string flags = "|";
+    for (int s = 35; s >= 0; s--) {
+      const int t = s % kBits;
+      if ((flags_ >> s) & 1)
+        flags += kDebug[t];
+      if (t == 0)
+        flags += "|";
+    }
+    return flags;
   }
 
  private:
-  static constexpr int kFlagChangesSomething = 0;
-  static constexpr int kFlagChangesCount = kFlagChangesSomething + 1;
+  static constexpr int kBits = 9;
+  static constexpr int kFieldWriteOffset = 0 * kBits;
+  static constexpr int kArrayWriteOffset = 1 * kBits;
+  static constexpr int kFieldReadOffset  = 2 * kBits;
+  static constexpr int kArrayReadOffset  = 3 * kBits;
 
-  static constexpr int kFlagDependsOnSomething = kFlagChangesCount;
-  static constexpr int kFlagDependsOnCount = kFlagDependsOnSomething + 1;
+  static constexpr uint64_t kAllWrites = 0x0003ffff;
+  static constexpr uint64_t kAllReads  = kAllWrites << kFieldReadOffset;
 
-  explicit SideEffects(size_t flags) : flags_(flags) {}
-
-  size_t ComputeDependsFlags() const {
-    return flags_ << kFlagChangesCount;
+  // Work around the fact that HIR aliases I/F and J/D.
+  // TODO: remove this interceptor once HIR types are clean
+  static uint64_t TypeFlagWithAlias(Primitive::Type type, int offset) {
+    switch (type) {
+      case Primitive::kPrimInt:
+      case Primitive::kPrimFloat:
+        return TypeFlag(Primitive::kPrimInt, offset) |
+               TypeFlag(Primitive::kPrimFloat, offset);
+      case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
+        return TypeFlag(Primitive::kPrimLong, offset) |
+               TypeFlag(Primitive::kPrimDouble, offset);
+      default:
+        return TypeFlag(type, offset);
+    }
   }
 
-  size_t flags_;
+  // Translates type to bit flag.
+  static uint64_t TypeFlag(Primitive::Type type, int offset) {
+    CHECK_NE(type, Primitive::kPrimVoid);
+    const uint64_t one = 1;
+    const int shift = type;  // 0-based consecutive enum
+    DCHECK_LE(kFieldWriteOffset, shift);
+    DCHECK_LT(shift, kArrayWriteOffset);
+    return one << (type + offset);
+  }
+
+  // Private constructor on direct flags value.
+  explicit SideEffects(uint64_t flags) : flags_(flags) {}
+
+  uint64_t flags_;
 };
 
 // A HEnvironment object contains the values of virtual registers at a given location.
@@ -1468,7 +1575,8 @@
   }
   virtual bool IsControlFlow() const { return false; }
   virtual bool CanThrow() const { return false; }
-  bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
+
+  bool DoesAnyWrite() const { return side_effects_.DoesAnyWrite(); }
 
   // Does not apply for all instructions, but having this at top level greatly
   // simplifies the null check elimination.
@@ -1957,29 +2065,24 @@
 
   // Returns whether `handler` is among its exception handlers (non-zero index
   // successors).
-  bool HasExceptionHandler(HBasicBlock* handler) const {
-    DCHECK(handler->IsCatchBlock());
-    return GetBlock()->GetSuccessors().Contains(handler, /* start_from */ 1);
-  }
-
-  // Returns whether successor at index `idx` is an exception handler.
-  bool IsExceptionalSuccessor(size_t idx) const {
-    DCHECK_LT(idx, GetBlock()->GetSuccessors().Size());
-    bool is_handler = (idx != 0);
-    DCHECK(!is_handler || GetBlock()->GetSuccessors().Get(idx)->IsCatchBlock());
-    return is_handler;
+  bool HasExceptionHandler(const HBasicBlock& handler) const {
+    DCHECK(handler.IsCatchBlock());
+    return GetBlock()->GetSuccessors().Contains(
+        const_cast<HBasicBlock*>(&handler), /* start_from */ 1);
   }
 
   // If not present already, adds `handler` to its block's list of exception
   // handlers.
   void AddExceptionHandler(HBasicBlock* handler) {
-    if (!HasExceptionHandler(handler)) {
+    if (!HasExceptionHandler(*handler)) {
       GetBlock()->AddSuccessor(handler);
     }
   }
 
   bool IsEntry() const { return kind_ == BoundaryKind::kEntry; }
 
+  bool HasSameExceptionHandlersAs(const HTryBoundary& other) const;
+
   DECLARE_INSTRUCTION(TryBoundary);
 
  private:
@@ -1988,6 +2091,24 @@
   DISALLOW_COPY_AND_ASSIGN(HTryBoundary);
 };
 
+// Iterator over exception handlers of a given HTryBoundary, i.e. over
+// exceptional successors of its basic block.
+class HExceptionHandlerIterator : public ValueObject {
+ public:
+  explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary)
+    : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
+
+  bool Done() const { return index_ == block_.GetSuccessors().Size(); }
+  HBasicBlock* Current() const { return block_.GetSuccessors().Get(index_); }
+  size_t CurrentSuccessorIndex() const { return index_; }
+  void Advance() { ++index_; }
+
+ private:
+  const HBasicBlock& block_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator);
+};
 
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
@@ -2674,7 +2795,7 @@
           uint32_t dex_pc,
           uint32_t dex_method_index,
           InvokeType original_invoke_type)
-    : HInstruction(SideEffects::All()),
+    : HInstruction(SideEffects::All()),  // assume write/read on all fields/arrays
       number_of_arguments_(number_of_arguments),
       inputs_(arena, number_of_arguments),
       return_type_(return_type),
@@ -3349,6 +3470,8 @@
     }
   }
 
+  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
+
   size_t InputCount() const OVERRIDE { return inputs_.Size(); }
 
   void AddInput(HInstruction* input);
@@ -3464,7 +3587,9 @@
                     bool is_volatile,
                     uint32_t field_idx,
                     const DexFile& dex_file)
-      : HExpression(field_type, SideEffects::DependsOnSomething()),
+      : HExpression(
+            field_type,
+            SideEffects::SideEffects::FieldReadOfType(field_type, is_volatile)),
         field_info_(field_offset, field_type, is_volatile, field_idx, dex_file) {
     SetRawInputAt(0, value);
   }
@@ -3506,7 +3631,8 @@
                     bool is_volatile,
                     uint32_t field_idx,
                     const DexFile& dex_file)
-      : HTemplateInstruction(SideEffects::ChangesSomething()),
+      : HTemplateInstruction(
+          SideEffects::FieldWriteOfType(field_type, is_volatile)),
         field_info_(field_offset, field_type, is_volatile, field_idx, dex_file),
         value_can_be_null_(true) {
     SetRawInputAt(0, object);
@@ -3537,7 +3663,7 @@
 class HArrayGet : public HExpression<2> {
  public:
   HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type)
-      : HExpression(type, SideEffects::DependsOnSomething()) {
+      : HExpression(type, SideEffects::ArrayReadOfType(type)) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
   }
@@ -3575,7 +3701,7 @@
             HInstruction* value,
             Primitive::Type expected_component_type,
             uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::ChangesSomething()),
+      : HTemplateInstruction(SideEffects::ArrayWriteOfType(expected_component_type)),
         dex_pc_(dex_pc),
         expected_component_type_(expected_component_type),
         needs_type_check_(value->GetType() == Primitive::kPrimNot),
@@ -3870,7 +3996,9 @@
 class HClinitCheck : public HExpression<1> {
  public:
   explicit HClinitCheck(HLoadClass* constant, uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffects::ChangesSomething()),
+      : HExpression(
+          Primitive::kPrimNot,
+          SideEffects::AllWrites()),  // assume write on all fields/arrays
         dex_pc_(dex_pc) {
     SetRawInputAt(0, constant);
   }
@@ -3906,7 +4034,9 @@
                   bool is_volatile,
                   uint32_t field_idx,
                   const DexFile& dex_file)
-      : HExpression(field_type, SideEffects::DependsOnSomething()),
+      : HExpression(
+            field_type,
+            SideEffects::SideEffects::FieldReadOfType(field_type, is_volatile)),
         field_info_(field_offset, field_type, is_volatile, field_idx, dex_file) {
     SetRawInputAt(0, cls);
   }
@@ -3945,7 +4075,8 @@
                   bool is_volatile,
                   uint32_t field_idx,
                   const DexFile& dex_file)
-      : HTemplateInstruction(SideEffects::ChangesSomething()),
+      : HTemplateInstruction(
+          SideEffects::FieldWriteOfType(field_type, is_volatile)),
         field_info_(field_offset, field_type, is_volatile, field_idx, dex_file),
         value_can_be_null_(true) {
     SetRawInputAt(0, cls);
@@ -4135,7 +4266,8 @@
 class HMemoryBarrier : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind)
-      : HTemplateInstruction(SideEffects::None()),
+      : HTemplateInstruction(
+          SideEffects::All()),  // assume write/read on all fields/arrays
         barrier_kind_(barrier_kind) {}
 
   MemBarrierKind GetBarrierKind() { return barrier_kind_; }
@@ -4156,7 +4288,8 @@
   };
 
   HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc)
-    : HTemplateInstruction(SideEffects::ChangesSomething()), kind_(kind), dex_pc_(dex_pc) {
+    : HTemplateInstruction(SideEffects::All()),  // assume write/read on all fields/arrays
+      kind_(kind), dex_pc_(dex_pc) {
     SetRawInputAt(0, object);
   }
 
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index bc56546..f793a65 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -40,7 +40,7 @@
   // Return the name of the pass.
   const char* GetPassName() const { return pass_name_; }
 
-  // Peform the analysis itself.
+  // Perform the analysis itself.
   virtual void Run() = 0;
 
  protected:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index ae1958a..601d668 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -35,6 +35,7 @@
 #include "dex/verified_method.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
 #include "elf_writer_quick.h"
@@ -132,7 +133,7 @@
   void StartPass(const char* pass_name) {
     // Dump graph first, then start timer.
     if (visualizer_enabled_) {
-      visualizer_.DumpGraph(pass_name, /* is_after_pass */ false);
+      visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
     }
     if (timing_logger_enabled_) {
       timing_logger_.StartTiming(pass_name);
@@ -145,7 +146,7 @@
       timing_logger_.EndTiming();
     }
     if (visualizer_enabled_) {
-      visualizer_.DumpGraph(pass_name, /* is_after_pass */ true);
+      visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_);
     }
 
     // Validate the HGraph if running in debug mode.
@@ -556,8 +557,8 @@
   }
 
   // Implementation of the space filter: do not compile a code item whose size in
-  // code units is bigger than 256.
-  static constexpr size_t kSpaceFilterOptimizingThreshold = 256;
+  // code units is bigger than 128.
+  static constexpr size_t kSpaceFilterOptimizingThreshold = 128;
   const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
   if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
       && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
@@ -566,7 +567,7 @@
   }
 
   DexCompilationUnit dex_compilation_unit(
-    nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
+    nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
     compiler_driver->GetVerifiedMethod(&dex_file, method_idx));
 
@@ -603,12 +604,29 @@
                              visualizer_output_.get(),
                              compiler_driver);
 
+  const uint8_t* interpreter_metadata = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<4> hs(soa.Self());
+    ClassLinker* class_linker = dex_compilation_unit.GetClassLinker();
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+    Handle<mirror::ClassLoader> loader(hs.NewHandle(
+        soa.Decode<mirror::ClassLoader*>(class_loader)));
+    ArtMethod* art_method = compiler_driver->ResolveMethod(
+        soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
+    // We may not get a method, for example if its class is erroneous.
+    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
+    if (art_method != nullptr) {
+      interpreter_metadata = art_method->GetQuickenedInfo();
+    }
+  }
   HGraphBuilder builder(graph,
                         &dex_compilation_unit,
                         &dex_compilation_unit,
                         &dex_file,
                         compiler_driver,
-                        compilation_stats_.get());
+                        compilation_stats_.get(),
+                        interpreter_metadata);
 
   VLOG(compiler) << "Building " << method_name;
 
@@ -629,7 +647,7 @@
   // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
   // to Quick.
   bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
-  if (run_optimizations_ && can_optimize && can_allocate_registers) {
+  if (run_optimizations_ && can_allocate_registers) {
     VLOG(compiler) << "Optimizing " << method_name;
 
     {
@@ -638,16 +656,21 @@
         // We could not transform the graph to SSA, bailout.
         LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
         MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
+        pass_observer.SetGraphInBadState();
         return nullptr;
       }
     }
 
-    return CompileOptimized(graph,
-                            codegen.get(),
-                            compiler_driver,
-                            dex_compilation_unit,
-                            &pass_observer);
-  } else if (shouldOptimize && can_allocate_registers) {
+    if (can_optimize) {
+      return CompileOptimized(graph,
+                              codegen.get(),
+                              compiler_driver,
+                              dex_compilation_unit,
+                              &pass_observer);
+    }
+  }
+
+  if (shouldOptimize && can_allocate_registers) {
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
     UNREACHABLE();
   } else if (can_use_baseline) {
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index ea1ca5a..9dbf638 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -24,14 +24,15 @@
   block_effects_.SetSize(graph_->GetBlocks().Size());
   loop_effects_.SetSize(graph_->GetBlocks().Size());
 
+  // In DEBUG mode, ensure side effects are properly initialized to empty.
   if (kIsDebugBuild) {
     for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
       HBasicBlock* block = it.Current();
       SideEffects effects = GetBlockEffects(block);
-      DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
+      DCHECK(effects.DoesNothing());
       if (block->IsLoopHeader()) {
         effects = GetLoopEffects(block);
-        DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
+        DCHECK(effects.DoesNothing());
       }
     }
   }
@@ -46,7 +47,9 @@
          inst_it.Advance()) {
       HInstruction* instruction = inst_it.Current();
       effects = effects.Union(instruction->GetSideEffects());
-      if (effects.HasAllSideEffects()) {
+      // If every possible write/read is represented, scanning further
+      // will not add any more information to side-effects of this block.
+      if (effects.DoesAll()) {
         break;
       }
     }
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
new file mode 100644
index 0000000..8db5a8a
--- /dev/null
+++ b/compiler/optimizing/side_effects_test.cc
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not read this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "nodes.h"
+#include "primitive.h"
+
+namespace art {
+
+/**
+ * Tests for the SideEffects class.
+ */
+
+//
+// Helper methods.
+//
+
+void testWriteAndReadSanity(SideEffects write, SideEffects read) {
+  EXPECT_FALSE(write.DoesNothing());
+  EXPECT_FALSE(read.DoesNothing());
+
+  EXPECT_TRUE(write.DoesAnyWrite());
+  EXPECT_FALSE(write.DoesAnyRead());
+  EXPECT_FALSE(read.DoesAnyWrite());
+  EXPECT_TRUE(read.DoesAnyRead());
+
+  // All-dependences.
+  SideEffects all = SideEffects::All();
+  EXPECT_TRUE(all.MayDependOn(write));
+  EXPECT_FALSE(write.MayDependOn(all));
+  EXPECT_FALSE(all.MayDependOn(read));
+  EXPECT_TRUE(read.MayDependOn(all));
+
+  // None-dependences.
+  SideEffects none = SideEffects::None();
+  EXPECT_FALSE(none.MayDependOn(write));
+  EXPECT_FALSE(write.MayDependOn(none));
+  EXPECT_FALSE(none.MayDependOn(read));
+  EXPECT_FALSE(read.MayDependOn(none));
+}
+
+void testWriteAndReadDependence(SideEffects write, SideEffects read) {
+  testWriteAndReadSanity(write, read);
+
+  // Dependence only in one direction.
+  EXPECT_FALSE(write.MayDependOn(read));
+  EXPECT_TRUE(read.MayDependOn(write));
+}
+
+void testNoWriteAndReadDependence(SideEffects write, SideEffects read) {
+  testWriteAndReadSanity(write, read);
+
+  // No dependence in any direction.
+  EXPECT_FALSE(write.MayDependOn(read));
+  EXPECT_FALSE(read.MayDependOn(write));
+}
+
+//
+// Actual tests.
+//
+
+TEST(SideEffectsTest, All) {
+  SideEffects all = SideEffects::All();
+  EXPECT_TRUE(all.DoesAnyWrite());
+  EXPECT_TRUE(all.DoesAnyRead());
+  EXPECT_FALSE(all.DoesNothing());
+  EXPECT_TRUE(all.DoesAll());
+}
+
+TEST(SideEffectsTest, None) {
+  SideEffects none = SideEffects::None();
+  EXPECT_FALSE(none.DoesAnyWrite());
+  EXPECT_FALSE(none.DoesAnyRead());
+  EXPECT_TRUE(none.DoesNothing());
+  EXPECT_FALSE(none.DoesAll());
+}
+
+TEST(SideEffectsTest, DependencesAndNoDependences) {
+  // Apply test to each individual primitive type.
+  for (Primitive::Type type = Primitive::kPrimNot;
+      type < Primitive::kPrimVoid;
+      type = Primitive::Type(type + 1)) {
+    // Same primitive type and access type: proper write/read dep.
+    testWriteAndReadDependence(
+        SideEffects::FieldWriteOfType(type, false),
+        SideEffects::FieldReadOfType(type, false));
+    testWriteAndReadDependence(
+        SideEffects::ArrayWriteOfType(type),
+        SideEffects::ArrayReadOfType(type));
+    // Same primitive type but different access type: no write/read dep.
+    testNoWriteAndReadDependence(
+        SideEffects::FieldWriteOfType(type, false),
+        SideEffects::ArrayReadOfType(type));
+    testNoWriteAndReadDependence(
+        SideEffects::ArrayWriteOfType(type),
+        SideEffects::FieldReadOfType(type, false));
+  }
+}
+
+TEST(SideEffectsTest, NoDependences) {
+  // Different primitive type, same access type: no write/read dep.
+  testNoWriteAndReadDependence(
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false),
+      SideEffects::FieldReadOfType(Primitive::kPrimDouble, false));
+  testNoWriteAndReadDependence(
+      SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
+      SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
+  // Everything different: no write/read dep.
+  testNoWriteAndReadDependence(
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false),
+      SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
+  testNoWriteAndReadDependence(
+      SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
+      SideEffects::FieldReadOfType(Primitive::kPrimDouble, false));
+}
+
+TEST(SideEffectsTest, VolatileDependences) {
+  SideEffects volatile_write =
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, true);
+  SideEffects any_write =
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false);
+  SideEffects volatile_read =
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, true);
+  SideEffects any_read =
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, false);
+
+  EXPECT_FALSE(volatile_write.MayDependOn(any_read));
+  EXPECT_TRUE(any_read.MayDependOn(volatile_write));
+  EXPECT_TRUE(volatile_write.MayDependOn(any_write));
+  EXPECT_FALSE(any_write.MayDependOn(volatile_write));
+
+  EXPECT_FALSE(volatile_read.MayDependOn(any_read));
+  EXPECT_TRUE(any_read.MayDependOn(volatile_read));
+  EXPECT_TRUE(volatile_read.MayDependOn(any_write));
+  EXPECT_FALSE(any_write.MayDependOn(volatile_read));
+}
+
+TEST(SideEffectsTest, SameWidthTypes) {
+  // Type I/F.
+  testWriteAndReadDependence(
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false),
+      SideEffects::FieldReadOfType(Primitive::kPrimFloat, false));
+  testWriteAndReadDependence(
+      SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
+      SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
+  // Type L/D.
+  testWriteAndReadDependence(
+      SideEffects::FieldWriteOfType(Primitive::kPrimLong, false),
+      SideEffects::FieldReadOfType(Primitive::kPrimDouble, false));
+  testWriteAndReadDependence(
+      SideEffects::ArrayWriteOfType(Primitive::kPrimLong),
+      SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
+}
+
+TEST(SideEffectsTest, AllWritesAndReads) {
+  SideEffects s = SideEffects::None();
+  // Keep taking the union of different writes and reads.
+  for (Primitive::Type type = Primitive::kPrimNot;
+        type < Primitive::kPrimVoid;
+        type = Primitive::Type(type + 1)) {
+    s = s.Union(SideEffects::FieldWriteOfType(type, false));
+    s = s.Union(SideEffects::ArrayWriteOfType(type));
+    s = s.Union(SideEffects::FieldReadOfType(type, false));
+    s = s.Union(SideEffects::ArrayReadOfType(type));
+  }
+  EXPECT_TRUE(s.DoesAll());
+}
+
+TEST(SideEffectsTest, BitStrings) {
+  EXPECT_STREQ(
+      "|||||",
+      SideEffects::None().ToString().c_str());
+  EXPECT_STREQ(
+      "|DFJISCBZL|DFJISCBZL|DFJISCBZL|DFJISCBZL|",
+      SideEffects::All().ToString().c_str());
+  EXPECT_STREQ(
+      "|||DFJISCBZL|DFJISCBZL|",
+      SideEffects::AllWrites().ToString().c_str());
+  EXPECT_STREQ(
+      "|DFJISCBZL|DFJISCBZL|||",
+      SideEffects::AllReads().ToString().c_str());
+  EXPECT_STREQ(
+      "||||L|",
+      SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str());
+  EXPECT_STREQ(
+      "|||Z||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str());
+  EXPECT_STREQ(
+      "||B|||",
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str());
+  EXPECT_STREQ(
+      "|DJ||||",  // note: DJ alias
+      SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str());
+  SideEffects s = SideEffects::None();
+  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false));
+  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false));
+  s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort));
+  s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false));
+  s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
+  s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
+  EXPECT_STREQ(
+      "|DFJI|FI|S|DJC|",   // note: DJ/FI alias.
+      s.ToString().c_str());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index c37b199..ff2e6ad 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -350,7 +350,9 @@
 void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
   current_locals_ = GetLocalsFor(block);
 
-  if (block->IsLoopHeader()) {
+  if (block->IsCatchBlock()) {
+    // Catch phis were already created and inputs collected from throwing sites.
+  } else if (block->IsLoopHeader()) {
     // If the block is a loop header, we know we only have visited the pre header
     // because we are visiting in reverse post order. We create phis for all initialized
     // locals from the pre header. Their inputs will be populated at the end of
@@ -551,19 +553,32 @@
 }
 
 void SsaBuilder::VisitInstruction(HInstruction* instruction) {
-  if (!instruction->NeedsEnvironment()) {
-    return;
+  if (instruction->NeedsEnvironment()) {
+    HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
+        GetGraph()->GetArena(),
+        current_locals_->Size(),
+        GetGraph()->GetDexFile(),
+        GetGraph()->GetMethodIdx(),
+        instruction->GetDexPc(),
+        GetGraph()->GetInvokeType(),
+        instruction);
+    environment->CopyFrom(*current_locals_);
+    instruction->SetRawEnvironment(environment);
   }
-  HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-      GetGraph()->GetArena(),
-      current_locals_->Size(),
-      GetGraph()->GetDexFile(),
-      GetGraph()->GetMethodIdx(),
-      instruction->GetDexPc(),
-      GetGraph()->GetInvokeType(),
-      instruction);
-  environment->CopyFrom(*current_locals_);
-  instruction->SetRawEnvironment(environment);
+
+  // If in a try block, propagate values of locals into catch blocks.
+  if (instruction->GetBlock()->IsInTry() && instruction->CanThrow()) {
+    HTryBoundary* try_block = instruction->GetBlock()->GetTryEntry();
+    for (HExceptionHandlerIterator it(*try_block); !it.Done(); it.Advance()) {
+      GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(it.Current());
+      for (size_t i = 0, e = current_locals_->Size(); i < e; ++i) {
+        HInstruction* local_value = current_locals_->Get(i);
+        if (local_value != nullptr) {
+          handler_locals->Get(i)->AsPhi()->AddInput(local_value);
+        }
+      }
+    }
+  }
 }
 
 void SsaBuilder::VisitTemporary(HTemporary* temp) {
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 1c83c4b..64600db 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -61,9 +61,22 @@
   GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
     GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId());
     if (locals == nullptr) {
-      locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>(
-          GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
-      locals->SetSize(GetGraph()->GetNumberOfVRegs());
+      const size_t vregs = GetGraph()->GetNumberOfVRegs();
+      ArenaAllocator* arena = GetGraph()->GetArena();
+      locals = new (arena) GrowableArray<HInstruction*>(arena, vregs);
+      locals->SetSize(vregs);
+
+      if (block->IsCatchBlock()) {
+        // We record incoming inputs of catch phis at throwing instructions and
+        // must therefore eagerly create the phis. Unused phis will be removed
+        // in the dead phi analysis.
+        for (size_t i = 0; i < vregs; ++i) {
+          HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid);
+          block->AddPhi(phi);
+          locals->Put(i, phi);
+        }
+      }
+
       locals_for_.Put(block->GetBlockId(), locals);
     }
     return locals;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 2f2e2d1..917341a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -114,6 +114,12 @@
       continue;
     }
 
+    if (phi->InputCount() == 0) {
+      DCHECK(phi->IsCatchPhi());
+      DCHECK(phi->IsDead());
+      continue;
+    }
+
     // Find if the inputs of the phi are the same instruction.
     HInstruction* candidate = phi->InputAt(0);
     // A loop phi cannot have itself as the first phi. Note that this
@@ -137,6 +143,11 @@
       continue;
     }
 
+    // The candidate may not dominate a phi in a catch block.
+    if (phi->IsCatchPhi() && !candidate->StrictlyDominates(phi)) {
+      continue;
+    }
+
     if (phi->IsInLoop()) {
       // Because we're updating the users of this phi, we may have new
       // phis candidate for elimination if this phi is in a loop. Add phis that
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 65610d5..1f1530f 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -248,7 +248,7 @@
   DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size());
+  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
       dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index b4ac1b4..33207d9 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -55,8 +55,7 @@
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
   ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
 
-  uint32_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(2u, number_of_location_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
@@ -154,8 +153,7 @@
   ASSERT_EQ(2u, encoding.NumberOfBytesForStackMask());
   ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
-  uint32_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(4u, number_of_location_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
@@ -304,8 +302,7 @@
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
   ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
 
-  uint32_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(1u, number_of_location_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
@@ -398,8 +395,7 @@
   // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
   // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
   // has a size of 1 bit.
-  uint32_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(2u, number_of_location_catalog_entries);
   ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries));
 
@@ -501,8 +497,7 @@
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
   ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
 
-  uint32_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(0u, number_of_location_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   ASSERT_EQ(0u, location_catalog.Size());
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 09d2270..0e3e08c 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -252,11 +252,11 @@
   if (offset_ < 0) {
     int32_t off = -offset_;
     CHECK_LT(off, 1024);
-    CHECK_EQ((off & 3 /* 0b11 */), 0);    // Must be multiple of 4.
+    CHECK_ALIGNED(off, 4);
     encoding = (am ^ (1 << kUShift)) | off >> 2;  // Flip U to adjust sign.
   } else {
     CHECK_LT(offset_, 1024);
-    CHECK_EQ((offset_ & 3 /* 0b11 */), 0);    // Must be multiple of 4.
+    CHECK_ALIGNED(offset_, 4);
     encoding =  am | offset_ >> 2;
   }
   encoding |= static_cast<uint32_t>(rn_) << 16;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 88b2f2c..413b9ea 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -25,6 +25,58 @@
 namespace art {
 namespace arm {
 
+void Thumb2Assembler::Fixup::PrepareDependents(Thumb2Assembler* assembler) {
+  // For each Fixup, it's easy to find the Fixups that it depends on as they are either
+  // the following or the preceding Fixups until we find the target. However, for fixup
+  // adjustment we need the reverse lookup, i.e. what Fixups depend on a given Fixup.
+  // This function creates a compact representation of this relationship, where we have
+  // all the dependents in a single array and Fixups reference their ranges by start
+  // index and count. (Instead of having a per-fixup vector.)
+
+  // Count the number of dependents of each Fixup.
+  const FixupId end_id = assembler->fixups_.size();
+  Fixup* fixups = assembler->fixups_.data();
+  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
+    uint32_t target = fixups[fixup_id].target_;
+    if (target > fixups[fixup_id].location_) {
+      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
+        fixups[id].dependents_count_ += 1u;
+      }
+    } else {
+      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
+        fixups[id - 1u].dependents_count_ += 1u;
+      }
+    }
+  }
+  // Assign index ranges in fixup_dependents_ to individual fixups. Record the end of the
+  // range in dependents_start_, we shall later decrement it as we fill in fixup_dependents_.
+  uint32_t number_of_dependents = 0u;
+  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
+    number_of_dependents += fixups[fixup_id].dependents_count_;
+    fixups[fixup_id].dependents_start_ = number_of_dependents;
+  }
+  if (number_of_dependents == 0u) {
+    return;
+  }
+  // Create and fill in the fixup_dependents_.
+  assembler->fixup_dependents_.reset(new FixupId[number_of_dependents]);
+  FixupId* dependents = assembler->fixup_dependents_.get();
+  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
+    uint32_t target = fixups[fixup_id].target_;
+    if (target > fixups[fixup_id].location_) {
+      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
+        fixups[id].dependents_start_ -= 1u;
+        dependents[fixups[id].dependents_start_] = fixup_id;
+      }
+    } else {
+      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
+        fixups[id - 1u].dependents_start_ -= 1u;
+        dependents[fixups[id - 1u].dependents_start_] = fixup_id;
+      }
+    }
+  }
+}
+
 void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) {
   CHECK(!label->IsBound());
 
@@ -32,10 +84,6 @@
     FixupId fixup_id = label->Position();                     // The id for linked Fixup.
     Fixup* fixup = GetFixup(fixup_id);                        // Get the Fixup at this id.
     fixup->Resolve(bound_pc);                                 // Fixup can be resolved now.
-    // Add this fixup as a dependency of all later fixups.
-    for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) {
-      GetFixup(id)->AddDependent(fixup_id);
-    }
     uint32_t fixup_location = fixup->GetLocation();
     uint16_t next = buffer_.Load<uint16_t>(fixup_location);   // Get next in chain.
     buffer_.Store<int16_t>(fixup_location, 0);
@@ -59,7 +107,7 @@
   uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size);
   if (adjustment != 0u) {
     *current_code_size += adjustment;
-    for (FixupId dependent_id : fixup->Dependents()) {
+    for (FixupId dependent_id : fixup->Dependents(*this)) {
       Fixup* dependent = GetFixup(dependent_id);
       dependent->IncreaseAdjustment(adjustment);
       if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) {
@@ -71,6 +119,7 @@
 }
 
 uint32_t Thumb2Assembler::AdjustFixups() {
+  Fixup::PrepareDependents(this);
   uint32_t current_code_size = buffer_.Size();
   std::deque<FixupId> fixups_to_recalculate;
   if (kIsDebugBuild) {
@@ -101,7 +150,7 @@
   }
 
   // Adjust literal pool labels for padding.
-  DCHECK_EQ(current_code_size & 1u, 0u);
+  DCHECK_ALIGNED(current_code_size, 2);
   uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size();
   if (literals_adjustment != 0u) {
     for (Literal& literal : literals_) {
@@ -152,7 +201,7 @@
     // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment.
     // We don't support byte and half-word literals.
     uint32_t code_size = buffer_.Size();
-    DCHECK_EQ(code_size & 1u, 0u);
+    DCHECK_ALIGNED(code_size, 2);
     if ((code_size & 2u) != 0u) {
       Emit16(0);
     }
@@ -168,7 +217,7 @@
 }
 
 inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
-  DCHECK_EQ(offset & 1, 0);
+  DCHECK_ALIGNED(offset, 2);
   int16_t encoding = B15 | B14;
   if (cond != AL) {
     DCHECK(IsInt<9>(offset));
@@ -181,7 +230,7 @@
 }
 
 inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) {
-  DCHECK_EQ(offset & 1, 0);
+  DCHECK_ALIGNED(offset, 2);
   int32_t s = (offset >> 31) & 1;   // Sign bit.
   int32_t encoding = B31 | B30 | B29 | B28 | B15 |
       (s << 26) |                   // Sign bit goes to bit 26.
@@ -205,7 +254,7 @@
 
 inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) {
   DCHECK(!IsHighRegister(rn));
-  DCHECK_EQ(offset & 1, 0);
+  DCHECK_ALIGNED(offset, 2);
   DCHECK(IsUint<7>(offset));
   DCHECK(cond == EQ || cond == NE);
   return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) |
@@ -250,7 +299,7 @@
 
 inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) {
   DCHECK(!IsHighRegister(rt));
-  DCHECK_EQ(offset & 3, 0);
+  DCHECK_ALIGNED(offset, 4);
   DCHECK(IsUint<10>(offset));
   return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2);
 }
@@ -261,7 +310,7 @@
 }
 
 inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) {
-  DCHECK_EQ(offset & 3, 0);
+  DCHECK_ALIGNED(offset, 4);
   CHECK(IsUint<10>(offset));
   return B31 | B30 | B29 | B27 |
       B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 |
@@ -270,7 +319,7 @@
 }
 
 inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) {
-  DCHECK_EQ(offset & 3, 0);
+  DCHECK_ALIGNED(offset, 4);
   CHECK(IsUint<10>(offset));
   return B31 | B30 | B29 | B27 | B26 | B24 |
       B23 /* U = 1 */ | B20 | B11 | B9 |
@@ -281,7 +330,7 @@
 }
 
 inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) {
-  DCHECK_EQ(offset & 3, 0);
+  DCHECK_ALIGNED(offset, 4);
   CHECK(IsUint<10>(offset));
   return B31 | B30 | B29 | B27 | B26 | B24 |
       B23 /* U = 1 */ | B20 | B11 | B9 | B8 |
@@ -294,7 +343,7 @@
 inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) {
   DCHECK(!IsHighRegister(rt));
   DCHECK(!IsHighRegister(rn));
-  DCHECK_EQ(offset & 3, 0);
+  DCHECK_ALIGNED(offset, 4);
   DCHECK(IsUint<7>(offset));
   return B14 | B13 | B11 |
       (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) |
@@ -1423,7 +1472,7 @@
           thumb_opcode = 3U /* 0b11 */;
           opcode_shift = 12;
           CHECK_LT(immediate, (1u << 9));
-          CHECK_EQ((immediate & 3u /* 0b11 */), 0u);
+          CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
           rn = R0;
@@ -1437,7 +1486,7 @@
           thumb_opcode = 5U /* 0b101 */;
           opcode_shift = 11;
           CHECK_LT(immediate, (1u << 10));
-          CHECK_EQ((immediate & 3u /* 0b11 */), 0u);
+          CHECK_ALIGNED(immediate, 4);
 
           // Remove rn from instruction.
           rn = R0;
@@ -1474,7 +1523,7 @@
            thumb_opcode = 0x61 /* 0b1100001 */;
            opcode_shift = 7;
            CHECK_LT(immediate, (1u << 9));
-           CHECK_EQ((immediate & 3u /* 0b11 */), 0u);
+           CHECK_ALIGNED(immediate, 4);
 
            // Remove rd and rn from instruction by orring it with immed and clearing bits.
            rn = R0;
@@ -1652,7 +1701,7 @@
 
 inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) {
   // The code size must be a multiple of 2.
-  DCHECK_EQ(current_code_size & 1u, 0u);
+  DCHECK_ALIGNED(current_code_size, 2);
   // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool.
   return current_code_size & 2;
 }
@@ -1697,7 +1746,7 @@
       // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC
       // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target
       // being aligned, current PC alignment can be inferred from diff.
-      DCHECK_EQ(diff & 1, 0);
+      DCHECK_ALIGNED(diff, 2);
       diff = diff + (diff & 2);
       DCHECK_GE(diff, 0);
       break;
@@ -2045,7 +2094,7 @@
       if (sp_relative) {
         // SP relative, 10 bit offset.
         CHECK_LT(offset, (1 << 10));
-        CHECK_EQ((offset & 3 /* 0b11 */), 0);
+        CHECK_ALIGNED(offset, 4);
         encoding |= rd << 8 | offset >> 2;
       } else {
         // No SP relative.  The offset is shifted right depending on
@@ -2058,12 +2107,12 @@
         } else if (half) {
           // 6 bit offset, shifted by 1.
           CHECK_LT(offset, (1 << 6));
-          CHECK_EQ((offset & 1 /* 0b1 */), 0);
+          CHECK_ALIGNED(offset, 2);
           offset >>= 1;
         } else {
           // 7 bit offset, shifted by 2.
           CHECK_LT(offset, (1 << 7));
-          CHECK_EQ((offset & 3 /* 0b11 */), 0);
+          CHECK_ALIGNED(offset, 4);
           offset >>= 2;
         }
         encoding |= rn << 3 | offset  << 6;
@@ -2220,17 +2269,7 @@
 
   if (label->IsBound()) {
     // The branch is to a bound label which means that it's a backwards branch.
-    // Record this branch as a dependency of all Fixups between the label and the branch.
     GetFixup(branch_id)->Resolve(label->Position());
-    for (FixupId fixup_id = branch_id; fixup_id != 0u; ) {
-      --fixup_id;
-      Fixup* fixup = GetFixup(fixup_id);
-      DCHECK_GE(label->Position(), 0);
-      if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) {
-        break;
-      }
-      fixup->AddDependent(branch_id);
-    }
     Emit16(0);
   } else {
     // Branch target is an unbound label. Add it to a singly-linked list maintained within
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5e6969b..838554e 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -24,6 +24,7 @@
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm/assembler_arm.h"
+#include "utils/array_ref.h"
 #include "offsets.h"
 
 namespace art {
@@ -37,6 +38,7 @@
         it_cond_index_(kNoItCondition),
         next_condition_(AL),
         fixups_(),
+        fixup_dependents_(),
         literals_(),
         last_position_adjustment_(0u),
         last_old_position_(0u),
@@ -507,12 +509,12 @@
       return adjustment_;
     }
 
-    const std::vector<FixupId>& Dependents() const {
-      return dependents_;
-    }
+    // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
+    static void PrepareDependents(Thumb2Assembler* assembler);
 
-    void AddDependent(FixupId dependent_id) {
-      dependents_.push_back(dependent_id);
+    ArrayRef<FixupId> Dependents(const Thumb2Assembler& assembler) const {
+      return ArrayRef<FixupId>(assembler.fixup_dependents_.get() + dependents_start_,
+                               dependents_count_);
     }
 
     // Resolve a branch when the target is known.
@@ -557,7 +559,8 @@
           location_(location),
           target_(kUnresolved),
           adjustment_(0u),
-          dependents_() {
+          dependents_count_(0u),
+          dependents_start_(0u) {
     }
     static size_t SizeInBytes(Size size);
 
@@ -584,7 +587,10 @@
     uint32_t location_;     // Offset into assembler buffer in bytes.
     uint32_t target_;       // Offset into assembler buffer in bytes.
     uint32_t adjustment_;   // The number of extra bytes inserted between location_ and target_.
-    std::vector<FixupId> dependents_;  // Fixups that require adjustment when current size changes.
+    // Fixups that require adjustment when current size changes are stored in a single
+    // array in the assembler and we store only the start index and count here.
+    uint32_t dependents_count_;
+    uint32_t dependents_start_;
   };
 
   // Emit a single 32 or 16 bit data processing instruction.
@@ -760,6 +766,7 @@
   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
 
   std::vector<Fixup> fixups_;
+  std::unique_ptr<FixupId[]> fixup_dependents_;
 
   // Use std::deque<> for literal labels to allow insertions at the end
   // without invalidating pointers and references to existing elements.
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index ff5a77c..303e0d5 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -62,14 +62,14 @@
   }
 
   template <size_t size>
-  constexpr ArrayRef(T (&array)[size])
+  explicit constexpr ArrayRef(T (&array)[size])
     : array_(array), size_(size) {
   }
 
   template <typename U, size_t size>
-  constexpr ArrayRef(U (&array)[size],
-                     typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-                         t ATTRIBUTE_UNUSED = tag())
+  explicit constexpr ArrayRef(U (&array)[size],
+                              typename std::enable_if<std::is_same<T, const U>::value, tag>::type
+                                  t ATTRIBUTE_UNUSED = tag())
     : array_(array), size_(size) {
   }
 
@@ -83,9 +83,9 @@
   }
 
   template <typename U, typename Alloc>
-  ArrayRef(const std::vector<U, Alloc>& v,
-           typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-               t ATTRIBUTE_UNUSED = tag())
+  explicit ArrayRef(const std::vector<U, Alloc>& v,
+                    typename std::enable_if<std::is_same<T, const U>::value, tag>::type
+                        t ATTRIBUTE_UNUSED = tag())
       : array_(v.data()), size_(v.size()) {
   }
 
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index d127d35..71e9a28 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -66,16 +66,22 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common.mk
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MULTILIB := both
+ifdef ART_MULTILIB_OVERRIDE_host
+  LOCAL_MULTILIB := $(ART_MULTILIB_OVERRIDE_host)
+endif
+ifeq ($(LOCAL_MULTILIB),both)
 LOCAL_MODULE_STEM_32 := dalvikvm32
 LOCAL_MODULE_STEM_64 := dalvikvm64
+endif
 LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
 include $(BUILD_HOST_EXECUTABLE)
-
 # Create symlink for the primary version target.
+ifeq ($(LOCAL_MULTILIB),both)
 include  $(BUILD_SYSTEM)/executable_prefer_symlink.mk
 
-ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)$(ART_PHONY_TEST_HOST_SUFFIX)
 ifdef 2ND_ART_PHONY_TEST_HOST_SUFFIX
   ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 endif
+endif
+ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/dexdump/Android.mk b/dexdump/Android.mk
index c6b4d47..a208ccf 100755
--- a/dexdump/Android.mk
+++ b/dexdump/Android.mk
@@ -50,5 +50,5 @@
 LOCAL_CFLAGS += -Wall
 LOCAL_SHARED_LIBRARIES += $(dexdump_libraries)
 LOCAL_MODULE := dexdump2
-LOCAL_MODULE_TAGS := optional
+LOCAL_MULTILIB := $(ART_MULTILIB_OVERRIDE_host)
 include $(BUILD_HOST_EXECUTABLE)
diff --git a/dexlist/Android.mk b/dexlist/Android.mk
index 988fe03..9fbd847 100755
--- a/dexlist/Android.mk
+++ b/dexlist/Android.mk
@@ -50,5 +50,5 @@
 LOCAL_CFLAGS += -Wall
 LOCAL_SHARED_LIBRARIES += $(dexlist_libraries)
 LOCAL_MODULE := dexlist2
-LOCAL_MODULE_TAGS := optional
+LOCAL_MULTILIB := $(ART_MULTILIB_OVERRIDE_host)
 include $(BUILD_HOST_EXECUTABLE)
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 82452ba..9325454 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1036,6 +1036,11 @@
         ScopedIndentation indent1(vios);
         DumpCodeInfo(vios, code_info, oat_method, *code_item);
       }
+    } else if (IsMethodGeneratedByDexToDexCompiler(oat_method, code_item)) {
+      // We don't encode the size in the table, so just emit that we have quickened
+      // information.
+      ScopedIndentation indent(vios);
+      vios->Stream() << "quickened data\n";
     } else {
       // Otherwise, display the vmap table.
       const uint8_t* raw_table = oat_method.GetVmapTable();
@@ -1345,7 +1350,21 @@
     // If the native GC map is null and the Dex `code_item` is not
     // null, then this method has been compiled with the optimizing
     // compiler.
-    return oat_method.GetGcMap() == nullptr && code_item != nullptr;
+    return oat_method.GetQuickCode() != nullptr &&
+           oat_method.GetGcMap() == nullptr &&
+           code_item != nullptr;
+  }
+
+  // Has `oat_method` -- corresponding to the Dex `code_item` -- been compiled by
+  // the dextodex compiler?
+  static bool IsMethodGeneratedByDexToDexCompiler(const OatFile::OatMethod& oat_method,
+                                                  const DexFile::CodeItem* code_item) {
+    // If the quick code is null, the Dex `code_item` is not
+    // null, and the vmap table is not null, then this method has been compiled
+    // with the dextodex compiler.
+    return oat_method.GetQuickCode() == nullptr &&
+           oat_method.GetVmapTable() != nullptr &&
+           code_item != nullptr;
   }
 
   void DumpDexRegisterMapAtOffset(VariableIndentationOutputStream* vios,
@@ -1931,9 +1950,12 @@
       }
       state->stats_.managed_code_bytes_ignoring_deduplication += quick_oat_code_size;
 
+      uint32_t method_access_flags = method->GetAccessFlags();
+
       indent_os << StringPrintf("OAT CODE: %p-%p\n", quick_oat_code_begin, quick_oat_code_end);
-      indent_os << StringPrintf("SIZE: Dex Instructions=%zd GC=%zd Mapping=%zd\n",
-                                dex_instruction_bytes, gc_map_bytes, pc_mapping_table_bytes);
+      indent_os << StringPrintf("SIZE: Dex Instructions=%zd GC=%zd Mapping=%zd AccessFlags=0x%x\n",
+                                dex_instruction_bytes, gc_map_bytes, pc_mapping_table_bytes,
+                                method_access_flags);
 
       size_t total_size = dex_instruction_bytes + gc_map_bytes + pc_mapping_table_bytes +
           vmap_table_bytes + quick_oat_code_size + ArtMethod::ObjectSize(image_pointer_size);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7f103a4..fe79e72 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -98,6 +98,7 @@
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  lambda/box_table.cc \
   jni_internal.cc \
   jobject_comparator.cc \
   linear_alloc.cc \
@@ -311,13 +312,14 @@
   dex_instruction.h \
   dex_instruction_utils.h \
   gc_root.h \
-  gc/allocator/rosalloc.h \
-  gc/collector/gc_type.h \
   gc/allocator_type.h \
+  gc/allocator/rosalloc.h \
   gc/collector_type.h \
+  gc/collector/gc_type.h \
+  gc/heap.h \
   gc/space/region_space.h \
   gc/space/space.h \
-  gc/heap.h \
+  gc/weak_root_state.h \
   image.h \
   instrumentation.h \
   indirect_reference_table.h \
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a7826a7..2000110 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -24,10 +24,9 @@
     .extern artDeliverPendingException
 
     /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Macro to spill the GPRs.
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SPILL_ALL_CALLEE_SAVE_GPRS
     push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
     .cfi_adjust_cfa_offset 36
     .cfi_rel_offset r4, 0
@@ -39,6 +38,14 @@
     .cfi_rel_offset r10, 24
     .cfi_rel_offset r11, 28
     .cfi_rel_offset lr, 32
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     */
+.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
+    SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     .cfi_adjust_cfa_offset 64
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
@@ -325,23 +332,25 @@
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
      * pointing back to the original caller.
      */
-.macro INVOKE_TRAMPOLINE c_name, cxx_name
+.macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-ENTRY \c_name
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case allocation triggers GC
     mov    r2, r9                         @ pass Thread::Current
     mov    r3, sp
-    bl     \cxx_name                      @ (method_idx, this, caller, Thread*, SP)
+    bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
     mov    r12, r1                        @ save Method*->code_
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
     bx     r12                            @ tail call to target
 1:
     DELIVER_PENDING_EXCEPTION
+.endm
+.macro INVOKE_TRAMPOLINE c_name, cxx_name
+ENTRY \c_name
+    INVOKE_TRAMPOLINE_BODY \cxx_name
 END \c_name
 .endm
 
-INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
 
 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
@@ -378,17 +387,7 @@
      *  +-------------------------+
      */
 ENTRY art_quick_invoke_stub_internal
-    push   {r4, r5, r6, r7, r8, r9, r10, r11, lr}               @ spill regs
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-    .cfi_rel_offset r6, 8
-    .cfi_rel_offset r7, 12
-    .cfi_rel_offset r8, 16
-    .cfi_rel_offset r9, 20
-    .cfi_rel_offset r10, 24
-    .cfi_rel_offset r11, 28
-    .cfi_rel_offset lr, 32
+    SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
     mov    r11, sp                         @ save the stack pointer
     .cfi_def_cfa_register r11
 
@@ -894,7 +893,7 @@
      */
 ENTRY art_quick_imt_conflict_trampoline
     mov    r0, r12
-    b art_quick_invoke_interface_trampoline
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 614936b..6d9b44a 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -450,9 +450,8 @@
      *
      * Clobbers xIP0.
      */
-.macro INVOKE_TRAMPOLINE c_name, cxx_name
+.macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-ENTRY \c_name
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
@@ -466,10 +465,13 @@
     br     xIP0                           // tail call to target
 1:
     DELIVER_PENDING_EXCEPTION
+.endm
+.macro INVOKE_TRAMPOLINE c_name, cxx_name
+ENTRY \c_name
+    INVOKE_TRAMPOLINE_BODY \cxx_name
 END \c_name
 .endm
 
-INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
 
 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
@@ -1429,9 +1431,10 @@
      * Called to resolve an imt conflict. xIP1 is a hidden argument that holds the target method's
      * dex method index.
      */
+    .extern artInvokeInterfaceTrampoline
 ENTRY art_quick_imt_conflict_trampoline
     mov    x0, xIP1
-    b art_quick_invoke_interface_trampoline
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index cc1de43..2819f92 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -459,9 +459,8 @@
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
      * pointing back to the original caller.
      */
-.macro INVOKE_TRAMPOLINE c_name, cxx_name
+.macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-ENTRY \c_name
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
@@ -474,10 +473,13 @@
     nop
 1:
     DELIVER_PENDING_EXCEPTION
+.endm
+.macro INVOKE_TRAMPOLINE c_name, cxx_name
+ENTRY \c_name
+    INVOKE_TRAMPOLINE_BODY \cxx_name
 END \c_name
 .endm
 
-INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
 
 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
@@ -1103,9 +1105,8 @@
      * dex method index.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    la      $t9, art_quick_invoke_interface_trampoline
-    jalr    $zero, $t9
     move    $a0, $t0
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 37c6c5b..abca70b 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -525,9 +525,8 @@
      * On success this wrapper will restore arguments and *jump* to the target, leaving the ra
      * pointing back to the original caller.
      */
-.macro INVOKE_TRAMPOLINE c_name, cxx_name
+.macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-ENTRY \c_name
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
@@ -541,10 +540,13 @@
     nop
 1:
     DELIVER_PENDING_EXCEPTION
+.endm
+.macro INVOKE_TRAMPOLINE c_name, cxx_name
+ENTRY \c_name
+    INVOKE_TRAMPOLINE_BODY \cxx_name
 END \c_name
 .endm
 
-INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
 
 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
@@ -1369,10 +1371,8 @@
      * dex method index.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    dla     $t9, art_quick_invoke_interface_trampoline
-    .cpreturn
-    jalr    $zero, $t9
     move    $a0, $t0
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 2159f0e..77b8e87 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -19,61 +19,53 @@
 
 #include "asm_support_x86.h"
 
-#if defined(__APPLE__) || (defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5))
-    // Clang's as(1) doesn't let you name macro parameters prior to 3.5.
-    #define MACRO0(macro_name) .macro macro_name
-    #define MACRO1(macro_name, macro_arg1) .macro macro_name
-    #define MACRO2(macro_name, macro_arg1, macro_args2) .macro macro_name
-    #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name
-    #define MACRO4(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4) .macro macro_name
-    #define MACRO5(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4, macro_arg5) .macro macro_name
-    #define END_MACRO .endmacro
+// Regular gas(1) & current clang/llvm assembler support named macro parameters.
+#define MACRO0(macro_name) .macro macro_name
+#define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
+#define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
+#define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
+#define MACRO4(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4) .macro macro_name macro_arg1, macro_arg2, macro_arg3, macro_arg4
+#define MACRO5(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4, macro_arg5) .macro macro_name macro_arg1, macro_arg2, macro_arg3, macro_arg4, macro_arg5
+#define END_MACRO .endm
 
-    // Clang's as(1) uses $0, $1, and so on for macro arguments.
-    #define RAW_VAR(name,index) $index
-    #define VAR(name,index) SYMBOL($index)
-    #define PLT_VAR(name, index) SYMBOL($index)
-    #define REG_VAR(name,index) %$index
-    #define CALL_MACRO(name,index) $index
-
-    //  The use of $x for arguments mean that literals need to be represented with $$x in macros.
-    #define LITERAL(value) $value
-    #define MACRO_LITERAL(value) $$value
+#if defined(__clang__)
+    // Clang/llvm does not support .altmacro. However, the clang/llvm preprocessor doesn't
+    // separate the backslash and parameter by a space. Everything just works.
+    #define RAW_VAR(name) \name
+    #define VAR(name) \name
+    #define CALLVAR(name) SYMBOL(\name)
+    #define PLT_VAR(name) \name@PLT
+    #define REG_VAR(name) %\name
+    #define CALL_MACRO(name) \name
 #else
-    // Regular gas(1) lets you name macro parameters.
-    #define MACRO0(macro_name) .macro macro_name
-    #define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
-    #define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
-    #define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
-    #define MACRO4(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4) .macro macro_name macro_arg1, macro_arg2, macro_arg3, macro_arg4
-    #define MACRO5(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4, macro_arg5) .macro macro_name macro_arg1, macro_arg2, macro_arg3, macro_arg4, macro_arg5
-    #define END_MACRO .endm
-
     // Regular gas(1) uses \argument_name for macro arguments.
     // We need to turn on alternate macro syntax so we can use & instead or the preprocessor
     // will screw us by inserting a space between the \ and the name. Even in this mode there's
     // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
-    // special character meaning care needs to be taken when passing registers as macro arguments.
+    // special character meaning care needs to be taken when passing registers as macro
+    // arguments.
     .altmacro
-    #define RAW_VAR(name,index) name&
-    #define VAR(name,index) name&
-    #define PLT_VAR(name, index) name&@PLT
-    #define REG_VAR(name,index) %name
-    #define CALL_MACRO(name,index) name&
+    #define RAW_VAR(name) name&
+    #define VAR(name) name&
+    #define CALLVAR(name) SYMBOL(name&)
+    #define PLT_VAR(name) name&@PLT
+    #define REG_VAR(name) %name
+    #define CALL_MACRO(name) name&
+#endif
 
-    #define LITERAL(value) $value
+#define LITERAL(value) $value
+#if defined(__APPLE__)
+    #define MACRO_LITERAL(value) $(value)
+#else
     #define MACRO_LITERAL(value) $value
 #endif
 
 #if defined(__APPLE__)
-    #define FUNCTION_TYPE(name,index)
-    #define SIZE(name,index)
-#elif defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
-    #define FUNCTION_TYPE(name,index) .type $index, @function
-    #define SIZE(name,index) .size $index, .-$index
+    #define FUNCTION_TYPE(name)
+    #define SIZE(name)
 #else
-    #define FUNCTION_TYPE(name,index) .type name&, @function
-    #define SIZE(name,index) .size name, .-name
+    #define FUNCTION_TYPE(name) .type name, @function
+    #define SIZE(name) .size name, .-name
 #endif
 
     // CFI support.
@@ -100,16 +92,10 @@
     #define CFI_REMEMBER_STATE
 #endif
 
-    // Symbols.
+    // Symbols. On a Mac, we need a leading underscore.
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
-    #if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
-        // TODO: Disabled for old clang 3.3, this leads to text relocations and there should be a
-        // better fix.
-        #define PLT_SYMBOL(name) name // ## @PLT
-    #else
-        #define PLT_SYMBOL(name) name ## @PLT
-    #endif
+    #define PLT_SYMBOL(name) name ## @PLT
 #else
     // Mac OS' symbols have an _ prefix.
     #define SYMBOL(name) _ ## name
@@ -129,11 +115,11 @@
 END_MACRO
 
 MACRO1(DEFINE_FUNCTION, c_name)
-    FUNCTION_TYPE(\c_name, 0)
-    ASM_HIDDEN VAR(c_name, 0)
-    .globl VAR(c_name, 0)
+    FUNCTION_TYPE(SYMBOL(\c_name))
+    ASM_HIDDEN CALLVAR(c_name)
+    .globl CALLVAR(c_name)
     ALIGN_FUNCTION_ENTRY
-VAR(c_name, 0):
+CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(esp, 4)
@@ -141,36 +127,38 @@
 
 MACRO1(END_FUNCTION, c_name)
     CFI_ENDPROC
-    SIZE(\c_name, 0)
+    SIZE(SYMBOL(\c_name))
 END_MACRO
 
 MACRO1(PUSH, reg)
-    pushl REG_VAR(reg, 0)
+    pushl REG_VAR(reg)
     CFI_ADJUST_CFA_OFFSET(4)
-    CFI_REL_OFFSET(REG_VAR(reg, 0), 0)
+    CFI_REL_OFFSET(REG_VAR(reg), 0)
 END_MACRO
 
 MACRO1(POP, reg)
-    popl REG_VAR(reg,0)
+    popl REG_VAR(reg)
     CFI_ADJUST_CFA_OFFSET(-4)
-    CFI_RESTORE(REG_VAR(reg,0))
+    CFI_RESTORE(REG_VAR(reg))
 END_MACRO
 
+#define UNREACHABLE int3
+
 MACRO1(UNIMPLEMENTED,name)
-    FUNCTION_TYPE(\name, 0)
-    .globl VAR(name, 0)
+    FUNCTION_TYPE(\name)
+    .globl VAR(name)
     ALIGN_FUNCTION_ENTRY
-VAR(name, 0):
+VAR(name):
     CFI_STARTPROC
-    int3
-    int3
+    UNREACHABLE
+    UNREACHABLE
     CFI_ENDPROC
-    SIZE(\name, 0)
+    SIZE(\name)
 END_MACRO
 
 MACRO1(SETUP_GOT_NOSAVE, got_reg)
 #ifndef __APPLE__
-    .ifc RAW_VAR(got_reg, 0), ebx
+    .ifc VAR(got_reg), ebx
       call __x86.get_pc_thunk.bx
       addl $_GLOBAL_OFFSET_TABLE_, %ebx
     .else
@@ -182,15 +170,16 @@
 // Macros to poison (negate) the reference for heap poisoning.
 MACRO1(POISON_HEAP_REF, rRef)
 #ifdef USE_HEAP_POISONING
-    neg REG_VAR(rRef, 0)
+    neg REG_VAR(rRef)
 #endif  // USE_HEAP_POISONING
 END_MACRO
 
 // Macros to unpoison (negate) the reference for heap poisoning.
 MACRO1(UNPOISON_HEAP_REF, rRef)
 #ifdef USE_HEAP_POISONING
-    neg REG_VAR(rRef, 0)
+    neg REG_VAR(rRef)
 #endif  // USE_HEAP_POISONING
 END_MACRO
 
+
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index f6c7649..ebfb3fa 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -26,15 +26,15 @@
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
-    subl  MACRO_LITERAL(12), %esp  // Grow stack by 3 words.
+    subl MACRO_LITERAL(12), %esp  // Grow stack by 3 words.
     CFI_ADJUST_CFA_OFFSET(12)
-    SETUP_GOT_NOSAVE RAW_VAR(got_reg, 0)
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
     // Load Runtime::instance_ from GOT.
-    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
-    movl (REG_VAR(temp_reg, 1)), REG_VAR(temp_reg, 1)
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg, 1))
+    pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
@@ -53,15 +53,15 @@
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
-    subl  MACRO_LITERAL(12), %esp  // Grow stack by 3 words.
+    subl MACRO_LITERAL(12), %esp  // Grow stack by 3 words.
     CFI_ADJUST_CFA_OFFSET(12)
-    SETUP_GOT_NOSAVE VAR(got_reg, 0)
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
     // Load Runtime::instance_ from GOT.
-    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
-    movl (REG_VAR(temp_reg, 1)), REG_VAR(temp_reg, 1)
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg, 1))
+    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
@@ -101,13 +101,13 @@
     movsd %xmm2, 16(%esp)
     movsd %xmm3, 24(%esp)
 
-    SETUP_GOT_NOSAVE VAR(got_reg, 0)
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
     // Load Runtime::instance_ from GOT.
-    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
-    movl (REG_VAR(temp_reg, 1)), REG_VAR(temp_reg, 1)
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg, 1))
+    pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
@@ -157,10 +157,10 @@
     addl MACRO_LITERAL(36), %esp  // Remove FPRs and EAX.
     CFI_ADJUST_CFA_OFFSET(-36)
 
-    POP ecx  // Restore args except eax
+    POP ecx                       // Restore args except eax
     POP edx
     POP ebx
-    POP ebp  // Restore callee saves
+    POP ebp                       // Restore callee saves
     POP esi
     POP edi
 END_MACRO
@@ -196,54 +196,54 @@
 MACRO0(DELIVER_PENDING_EXCEPTION)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save callee saves for throw
     // Outgoing argument set up
-    subl  MACRO_LITERAL(12), %esp              // Alignment padding
+    subl MACRO_LITERAL(12), %esp              // Alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
-    int3                                       // unreached
+    UNREACHABLE
 END_MACRO
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
     // Outgoing argument set up
-    subl  MACRO_LITERAL(12), %esp  // alignment padding
+    subl MACRO_LITERAL(12), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call VAR(cxx_name, 1)         // cxx_name(Thread*)
-    int3                          // unreached
-    END_FUNCTION RAW_VAR(c_name, 0)
+    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
+    UNREACHABLE
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
-    subl  MACRO_LITERAL(8), %esp  // alignment padding
+    subl MACRO_LITERAL(8), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, Thread*)
-    int3                          // unreached
-    END_FUNCTION RAW_VAR(c_name, 0)
+    PUSH eax                                   // pass arg1
+    call CALLVAR(cxx_name)                     // cxx_name(arg1, Thread*)
+    UNREACHABLE
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
     // Outgoing argument set up
-    PUSH eax                      // alignment padding
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    PUSH eax                                   // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, Thread*)
-    int3                          // unreached
-    END_FUNCTION RAW_VAR(c_name, 0)
+    PUSH ecx                                   // pass arg2
+    PUSH eax                                   // pass arg1
+    call CALLVAR(cxx_name)                     // cxx_name(arg1, arg2, Thread*)
+    UNREACHABLE
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
     /*
@@ -293,8 +293,7 @@
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
      * pointing back to the original caller.
      */
-MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
     movl %esp, %edx  // remember SP
 
@@ -304,7 +303,7 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, Thread*, SP)
+    call CALLVAR(cxx_name)        // cxx_name(arg1, arg2, Thread*, SP)
     movl %edx, %edi               // save code pointer in EDI
     addl MACRO_LITERAL(20), %esp  // Pop arguments skip eax
     CFI_ADJUST_CFA_OFFSET(-20)
@@ -334,10 +333,13 @@
     addl MACRO_LITERAL(4), %esp   // Pop code pointer off stack
     CFI_ADJUST_CFA_OFFSET(-4)
     DELIVER_PENDING_EXCEPTION
-    END_FUNCTION RAW_VAR(c_name, 0)
+END_MACRO
+MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name)
+    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
-INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
 
 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
@@ -352,27 +354,27 @@
      */
 MACRO5(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, shorty, arg_array, temp_char, finished)
 1: // LOOP
-    movb (REG_VAR(shorty, 1)), REG_VAR(temp_char, 3)  // temp_char := *shorty
-    addl MACRO_LITERAL(1), REG_VAR(shorty, 1)         // shorty++
-    cmpb MACRO_LITERAL(0), REG_VAR(temp_char, 3)      // if (temp_char == '\0')
-    je RAW_VAR(finished, 4)                           //   goto finished
-    cmpb MACRO_LITERAL(68), REG_VAR(temp_char, 3)     // if (temp_char == 'D')
-    je 2f                                             //   goto FOUND_DOUBLE
-    cmpb MACRO_LITERAL(70), REG_VAR(temp_char, 3)     // if (temp_char == 'F')
-    je 3f                                             //   goto FOUND_FLOAT
-    addl MACRO_LITERAL(4), REG_VAR(arg_array, 2)      // arg_array++
+    movb (REG_VAR(shorty)), REG_VAR(temp_char)     // temp_char := *shorty
+    addl MACRO_LITERAL(1), REG_VAR(shorty)         // shorty++
+    cmpb MACRO_LITERAL(0), REG_VAR(temp_char)      // if (temp_char == '\0')
+    je VAR(finished)                               //   goto finished
+    cmpb MACRO_LITERAL(68), REG_VAR(temp_char)     // if (temp_char == 'D')
+    je 2f                                          //   goto FOUND_DOUBLE
+    cmpb MACRO_LITERAL(70), REG_VAR(temp_char)     // if (temp_char == 'F')
+    je 3f                                          //   goto FOUND_FLOAT
+    addl MACRO_LITERAL(4), REG_VAR(arg_array)      // arg_array++
     //  Handle extra space in arg array taken by a long.
-    cmpb MACRO_LITERAL(74), REG_VAR(temp_char, 3)     // if (temp_char != 'J')
-    jne 1b                                            //   goto LOOP
-    addl MACRO_LITERAL(4), REG_VAR(arg_array, 2)      // arg_array++
-    jmp 1b                                            // goto LOOP
+    cmpb MACRO_LITERAL(74), REG_VAR(temp_char)     // if (temp_char != 'J')
+    jne 1b                                         //   goto LOOP
+    addl MACRO_LITERAL(4), REG_VAR(arg_array)      // arg_array++
+    jmp 1b                                         // goto LOOP
 2:  // FOUND_DOUBLE
-    movsd (REG_VAR(arg_array, 2)), REG_VAR(xmm_reg, 0)
-    addl MACRO_LITERAL(8), REG_VAR(arg_array, 2)      // arg_array+=2
+    movsd (REG_VAR(arg_array)), REG_VAR(xmm_reg)
+    addl MACRO_LITERAL(8), REG_VAR(arg_array)      // arg_array+=2
     jmp 4f
 3:  // FOUND_FLOAT
-    movss (REG_VAR(arg_array, 2)), REG_VAR(xmm_reg, 0)
-    addl MACRO_LITERAL(4), REG_VAR(arg_array, 2)      // arg_array++
+    movss (REG_VAR(arg_array)), REG_VAR(xmm_reg)
+    addl MACRO_LITERAL(4), REG_VAR(arg_array)      // arg_array++
 4:
 END_MACRO
 
@@ -383,21 +385,21 @@
      */
 MACRO4(SKIP_OVER_FLOATS, shorty, arg_array, temp_char, finished)
 1: // LOOP:
-    movb (REG_VAR(shorty, 0)), REG_VAR(temp_char, 2)  // temp_char := *shorty
-    addl MACRO_LITERAL(1), REG_VAR(shorty, 0)         // shorty++
-    cmpb MACRO_LITERAL(0), REG_VAR(temp_char, 2)      // if (temp_char == '\0')
-    je RAW_VAR(finished, 3)                           //   goto finished
-    cmpb MACRO_LITERAL(70), REG_VAR(temp_char, 2)     // if (temp_char == 'F')
-    je 3f                                             //   goto SKIP_FLOAT
-    cmpb MACRO_LITERAL(68), REG_VAR(temp_char, 2)     // if (temp_char == 'D')
-    je 4f                                             //   goto SKIP_DOUBLE
-    jmp 5f                                            // goto end
+    movb (REG_VAR(shorty)), REG_VAR(temp_char)     // temp_char := *shorty
+    addl MACRO_LITERAL(1), REG_VAR(shorty)         // shorty++
+    cmpb MACRO_LITERAL(0), REG_VAR(temp_char)      // if (temp_char == '\0')
+    je VAR(finished)                               //   goto finished
+    cmpb MACRO_LITERAL(70), REG_VAR(temp_char)     // if (temp_char == 'F')
+    je 3f                                          //   goto SKIP_FLOAT
+    cmpb MACRO_LITERAL(68), REG_VAR(temp_char)     // if (temp_char == 'D')
+    je 4f                                          //   goto SKIP_DOUBLE
+    jmp 5f                                         // goto end
 3:  // SKIP_FLOAT
-    addl MACRO_LITERAL(4), REG_VAR(arg_array, 1)      // arg_array++
-    jmp 1b                                            // goto LOOP
+    addl MACRO_LITERAL(4), REG_VAR(arg_array)      // arg_array++
+    jmp 1b                                         // goto LOOP
 4:  // SKIP_DOUBLE
-    addl MACRO_LITERAL(8), REG_VAR(arg_array, 1)      // arg_array+=2
-    jmp 1b                                            // goto LOOP
+    addl MACRO_LITERAL(8), REG_VAR(arg_array)      // arg_array+=2
+    jmp 1b                                         // goto LOOP
 5:
 END_MACRO
 
@@ -617,147 +619,148 @@
 END_FUNCTION art_quick_invoke_static_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp  // push padding
+    subl MACRO_LITERAL(12), %esp                // push padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call VAR(cxx_name, 1)         // cxx_name(Thread*)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
+    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
+    addl MACRO_LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME         // restore frame up to return address
+    CALL_MACRO(return_macro)                    // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl MACRO_LITERAL(8), %esp   // push padding
+    subl MACRO_LITERAL(8), %esp                  // push padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, Thread*)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
+    PUSH eax                                     // pass arg1
+    call CALLVAR(cxx_name)                       // cxx_name(arg1, Thread*)
+    addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    CALL_MACRO(return_macro)                     // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH eax                      // push padding
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    PUSH eax                                     // push padding
+    pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, Thread*)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
+    PUSH ecx                                     // pass arg2
+    PUSH eax                                     // pass arg1
+    call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, Thread*)
+    addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    CALL_MACRO(return_macro)                     // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass arg3
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, Thread*)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
+    PUSH edx                                     // pass arg3
+    PUSH ecx                                     // pass arg2
+    PUSH eax                                     // pass arg1
+    call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, Thread*)
+    addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    CALL_MACRO(return_macro)                     // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp  // alignment padding
+    subl MACRO_LITERAL(12), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ebx                      // pass arg4
-    PUSH edx                      // pass arg3
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, arg4, Thread*)
-    addl MACRO_LITERAL(32), %esp  // pop arguments
+    PUSH ebx                                     // pass arg4
+    PUSH edx                                     // pass arg3
+    PUSH ecx                                     // pass arg2
+    PUSH eax                                     // pass arg1
+    call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    addl MACRO_LITERAL(32), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    CALL_MACRO(return_macro)                     // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx // save ref containing registers for GC
+    DEFINE_FUNCTION VAR(c_name)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx       // save ref containing registers for GC
     // Outgoing argument set up
     mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
-    PUSH eax                      // push padding
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    PUSH eax                                          // push padding
+    pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ecx                      // pass referrer
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, referrer, Thread*)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
+    PUSH ecx                                          // pass referrer
+    PUSH eax                                          // pass arg1
+    call CALLVAR(cxx_name)                            // cxx_name(arg1, referrer, Thread*)
+    addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    CALL_MACRO(return_macro)                          // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
+    DEFINE_FUNCTION VAR(c_name)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
     // Outgoing argument set up
     mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %edx  // get referrer
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass referrer
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, referrer, Thread*)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
+    PUSH edx                                          // pass referrer
+    PUSH ecx                                          // pass arg2
+    PUSH eax                                          // pass arg1
+    call CALLVAR(cxx_name)                            // cxx_name(arg1, arg2, referrer, Thread*)
+    addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    CALL_MACRO(return_macro)                          // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
+    DEFINE_FUNCTION VAR(c_name)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
     // Outgoing argument set up
     mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
-    subl MACRO_LITERAL(12), %esp  // alignment padding
+    subl MACRO_LITERAL(12), %esp                      // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ebx                      // pass referrer
-    PUSH edx                      // pass arg3
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, referrer, Thread*)
-    addl LITERAL(32), %esp        // pop arguments
+    PUSH ebx                                          // pass referrer
+    PUSH edx                                          // pass arg3
+    PUSH ecx                                          // pass arg2
+    PUSH eax                                          // pass arg1
+    call CALLVAR(cxx_name)                            // cxx_name(arg1, arg2, arg3, referrer,
+                                                      //          Thread*)
+    addl LITERAL(32), %esp                            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION RAW_VAR(c_name, 0)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    CALL_MACRO(return_macro)                          // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
@@ -778,9 +781,9 @@
 
 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
     cmpl MACRO_LITERAL(0),%fs:THREAD_EXCEPTION_OFFSET // exception field == 0 ?
-    jne 1f                         // if exception field != 0 goto 1
-    ret                            // return
-1:                                 // deliver exception on current thread
+    jne 1f                                            // if exception field != 0 goto 1
+    ret                                               // return
+1:                                                    // deliver exception on current thread
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
@@ -1018,15 +1021,15 @@
 .Lslow_lock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl LITERAL(8), %esp         // alignment padding
+    subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                      // pass object
-    call SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*)
-    addl LITERAL(16), %esp  // pop arguments
+    PUSH eax                              // pass object
+    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
+    addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
@@ -1073,54 +1076,54 @@
 .Lslow_unlock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl LITERAL(8), %esp         // alignment padding
+    subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                      // pass object
+    PUSH eax                              // pass object
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
-    addl LITERAL(16), %esp  // pop arguments
+    addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
 DEFINE_FUNCTION art_quick_is_assignable
-    PUSH eax                     // alignment padding
-    PUSH ecx                     // pass arg2 - obj->klass
-    PUSH eax                     // pass arg1 - checked class
+    PUSH eax                              // alignment padding
+    PUSH ecx                              // pass arg2 - obj->klass
+    PUSH eax                              // pass arg1 - checked class
     call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
-    addl LITERAL(12), %esp        // pop arguments
+    addl LITERAL(12), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_is_assignable
 
 DEFINE_FUNCTION art_quick_check_cast
-    PUSH eax                     // alignment padding
-    PUSH ecx                     // pass arg2 - obj->klass
-    PUSH eax                     // pass arg1 - checked class
+    PUSH eax                              // alignment padding
+    PUSH ecx                              // pass arg2 - obj->klass
+    PUSH eax                              // pass arg1 - checked class
     call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     testl %eax, %eax
-    jz 1f                         // jump forward if not assignable
-    addl LITERAL(12), %esp        // pop arguments
+    jz 1f                                 // jump forward if not assignable
+    addl LITERAL(12), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
 
-    CFI_ADJUST_CFA_OFFSET(12)     // Reset unwind info so following code unwinds.
+    CFI_ADJUST_CFA_OFFSET(12)             // Reset unwind info so following code unwinds.
 1:
-    POP eax                       // pop arguments
+    POP eax                               // pop arguments
     POP ecx
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-4)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
     // Outgoing argument set up
-    PUSH eax                      // alignment padding
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    PUSH eax                              // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ecx                      // pass arg2
-    PUSH eax                      // pass arg1
+    PUSH ecx                              // pass arg2
+    PUSH eax                              // pass arg1
     call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
-    int3                          // unreached
+    UNREACHABLE
 END_FUNCTION art_quick_check_cast
 
     /*
@@ -1172,10 +1175,10 @@
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
     ret
 .Lcheck_assignability:
-    PUSH eax                     // save arguments
+    PUSH eax                      // save arguments
     PUSH ecx
     PUSH edx
-    subl LITERAL(8), %esp        // alignment padding
+    subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
 #ifdef USE_HEAP_POISONING
     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax  // pass arg2 - type of the value to be stored
@@ -1213,7 +1216,7 @@
     PUSH edx                      // pass arg2 - value
     PUSH eax                      // pass arg1 - array
     call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
-    int3                          // unreached
+    UNREACHABLE
 END_FUNCTION art_quick_aput_obj
 
 DEFINE_FUNCTION art_quick_memcpy
@@ -1250,37 +1253,37 @@
 END_FUNCTION art_quick_f2l
 
 DEFINE_FUNCTION art_quick_ldiv
-    subl LITERAL(12), %esp       // alignment padding
+    subl LITERAL(12), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                     // pass arg4 b.hi
-    PUSH edx                     // pass arg3 b.lo
-    PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                     // pass arg1 a.lo
-    call SYMBOL(artLdiv)     // (jlong a, jlong b)
-    addl LITERAL(28), %esp       // pop arguments
+    PUSH ebx                      // pass arg4 b.hi
+    PUSH edx                      // pass arg3 b.lo
+    PUSH ecx                      // pass arg2 a.hi
+    PUSH eax                      // pass arg1 a.lo
+    call SYMBOL(artLdiv)          // (jlong a, jlong b)
+    addl LITERAL(28), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-28)
     ret
 END_FUNCTION art_quick_ldiv
 
 DEFINE_FUNCTION art_quick_lmod
-    subl LITERAL(12), %esp       // alignment padding
+    subl LITERAL(12), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                     // pass arg4 b.hi
-    PUSH edx                     // pass arg3 b.lo
-    PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                     // pass arg1 a.lo
-    call SYMBOL(artLmod)     // (jlong a, jlong b)
-    addl LITERAL(28), %esp       // pop arguments
+    PUSH ebx                      // pass arg4 b.hi
+    PUSH edx                      // pass arg3 b.lo
+    PUSH ecx                      // pass arg2 a.hi
+    PUSH eax                      // pass arg1 a.lo
+    call SYMBOL(artLmod)          // (jlong a, jlong b)
+    addl LITERAL(28), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-28)
     ret
 END_FUNCTION art_quick_lmod
 
 DEFINE_FUNCTION art_quick_lmul
-    imul %eax, %ebx              // ebx = a.lo(eax) * b.hi(ebx)
-    imul %edx, %ecx              // ecx = b.lo(edx) * a.hi(ecx)
-    mul  %edx                    // edx:eax = a.lo(eax) * b.lo(edx)
+    imul %eax, %ebx               // ebx = a.lo(eax) * b.hi(ebx)
+    imul %edx, %ecx               // ecx = b.lo(edx) * a.hi(ecx)
+    mul  %edx                     // edx:eax = a.lo(eax) * b.lo(edx)
     add  %ebx, %ecx
-    add  %ecx, %edx              // edx += (a.lo * b.hi) + (b.lo * a.hi)
+    add  %ecx, %edx               // edx += (a.lo * b.hi) + (b.lo * a.hi)
     ret
 END_FUNCTION art_quick_lmul
 
@@ -1415,7 +1418,7 @@
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
     movd %xmm7, %eax              // get target method index stored in xmm7
-    jmp SYMBOL(art_quick_invoke_interface_trampoline)
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
@@ -1440,7 +1443,7 @@
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
-    movl %esp, %ebp                 // save SP at callee-save frame
+    movl %esp, %ebp               // save SP at callee-save frame
     CFI_DEF_CFA_REGISTER(ebp)
     subl LITERAL(5120), %esp
     // prepare for artQuickGenericJniTrampoline call
@@ -1475,7 +1478,7 @@
     //  (esp)    4(esp)  12(esp)    <= C calling convention
     //  fs:...  eax:edx   fp0      <= where they are
 
-    subl LITERAL(20), %esp         // Padding & pass float result.
+    subl LITERAL(20), %esp        // Padding & pass float result.
     fstpl (%esp)
     pushl %edx                    // Pass int result.
     pushl %eax
@@ -1498,7 +1501,7 @@
     CFI_ADJUST_CFA_OFFSET(-(4 + 4 * 8))
 
     POP ecx
-    addl LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
+    addl LITERAL(4), %esp         // Avoid edx, as it may be part of the result.
     CFI_ADJUST_CFA_OFFSET(-4)
     POP ebx
     POP ebp  // Restore callee saves
@@ -1537,7 +1540,7 @@
     addl LITERAL(48), %esp        // Remove FPRs and EAX, ECX, EDX, EBX.
     CFI_ADJUST_CFA_OFFSET(-48)
 
-    POP ebp  // Restore callee saves
+    POP ebp                       // Restore callee saves
     POP esi
     POP edi
 
@@ -1634,7 +1637,7 @@
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
     call SYMBOL(artDeoptimize)    // artDeoptimize(Thread*)
-    int3                          // Unreachable.
+    UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
     /*
@@ -1645,8 +1648,8 @@
      *    ecx:   comp string object (known non-null)
      */
 DEFINE_FUNCTION art_quick_string_compareto
-    PUSH esi                    // push callee save reg
-    PUSH edi                    // push callee save reg
+    PUSH esi                      // push callee save reg
+    PUSH edi                      // push callee save reg
     mov MIRROR_STRING_COUNT_OFFSET(%eax), %edx
     mov MIRROR_STRING_COUNT_OFFSET(%ecx), %ebx
     lea MIRROR_STRING_VALUE_OFFSET(%eax), %esi
@@ -1690,7 +1693,7 @@
     PUSH ecx                        // second arg to longjmp (1)
     PUSH eax                        // first arg to longjmp (jmp_buf)
     call PLT_SYMBOL(longjmp)
-    int3                            // won't get here.
+    UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
     // TODO: implement these!
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index b2b6c2d..706ae58 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -19,57 +19,49 @@
 
 #include "asm_support_x86_64.h"
 
-#if defined(__APPLE__) || (defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5))
-    // Clang's as(1) doesn't let you name macro parameters prior to 3.5.
-    #define MACRO0(macro_name) .macro macro_name
-    #define MACRO1(macro_name, macro_arg1) .macro macro_name
-    #define MACRO2(macro_name, macro_arg1, macro_args2) .macro macro_name
-    #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name
-    #define END_MACRO .endmacro
+// Regular gas(1) & current clang/llvm assembler support named macro parameters.
+#define MACRO0(macro_name) .macro macro_name
+#define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
+#define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
+#define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
+#define END_MACRO .endm
 
-    // Clang's as(1) uses $0, $1, and so on for macro arguments.
-    #define RAW_VAR(name,index) $index
-    #define VAR(name,index) SYMBOL($index)
-    #define PLT_VAR(name, index) PLT_SYMBOL($index)
-    #define REG_VAR(name,index) %$index
-    #define CALL_MACRO(name,index) $index
-
-    //  The use of $x for arguments mean that literals need to be represented with $$x in macros.
-    #define LITERAL(value) $value
-    #define MACRO_LITERAL(value) $$value
+#if defined(__clang__)
+    // Clang/llvm does not support .altmacro. However, the clang/llvm preprocessor doesn't
+    // separate the backslash and parameter by a space. Everything just works.
+    #define RAW_VAR(name) \name
+    #define VAR(name) SYMBOL(\name)
+    #define PLT_VAR(name) \name@PLT
+    #define REG_VAR(name) %\name
+    #define CALL_MACRO(name) \name
 #else
-    // Regular gas(1) lets you name macro parameters.
-    #define MACRO0(macro_name) .macro macro_name
-    #define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
-    #define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
-    #define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
-    #define END_MACRO .endm
-
     // Regular gas(1) uses \argument_name for macro arguments.
     // We need to turn on alternate macro syntax so we can use & instead or the preprocessor
     // will screw us by inserting a space between the \ and the name. Even in this mode there's
     // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
-    // special character meaning care needs to be taken when passing registers as macro arguments.
+    // special character meaning care needs to be taken when passing registers as macro
+    // arguments.
     .altmacro
-    #define RAW_VAR(name,index) name&
-    #define VAR(name,index) name&
-    #define PLT_VAR(name, index) name&@PLT
-    #define REG_VAR(name,index) %name
-    #define CALL_MACRO(name,index) name&
+    #define RAW_VAR(name) name&
+    #define VAR(name) name&
+    #define PLT_VAR(name) name&@PLT
+    #define REG_VAR(name) %name
+    #define CALL_MACRO(name) name&
+#endif
 
-    #define LITERAL(value) $value
+#define LITERAL(value) $value
+#if defined(__APPLE__)
+    #define MACRO_LITERAL(value) $$(value)
+#else
     #define MACRO_LITERAL(value) $value
 #endif
 
 #if defined(__APPLE__)
-    #define FUNCTION_TYPE(name,index)
-    #define SIZE(name,index)
-#elif defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
-    #define FUNCTION_TYPE(name,index) .type $index, @function
-    #define SIZE(name,index) .size $index, .-$index
+    #define FUNCTION_TYPE(name)
+    #define SIZE(name)
 #else
-    #define FUNCTION_TYPE(name,index) .type name&, @function
-    #define SIZE(name,index) .size name, .-name
+    #define FUNCTION_TYPE(name) .type name, @function
+    #define SIZE(name) .size name, .-name
 #endif
 
     // CFI support.
@@ -95,13 +87,7 @@
     // Symbols.
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
-    #if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
-        // TODO: Disabled for old clang 3.3, this leads to text relocations and there should be a
-        // better fix.
-        #define PLT_SYMBOL(name) name // ## @PLT
-    #else
-        #define PLT_SYMBOL(name) name ## @PLT
-    #endif
+    #define PLT_SYMBOL(name) name ## @PLT
 #else
     #define SYMBOL(name) _ ## name
     #define PLT_SYMBOL(name) _ ## name
@@ -122,11 +108,11 @@
 // TODO: we might need to use SYMBOL() here to add the underscore prefix
 // for mac builds.
 MACRO1(DEFINE_FUNCTION, c_name)
-    FUNCTION_TYPE(\c_name, 0)
-    ASM_HIDDEN VAR(c_name, 0)
-    .globl VAR(c_name, 0)
+    FUNCTION_TYPE(SYMBOL(\c_name))
+    ASM_HIDDEN SYMBOL(\c_name)
+    .globl VAR(c_name)
     ALIGN_FUNCTION_ENTRY
-VAR(c_name, 0):
+VAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(rsp, 8)
@@ -134,32 +120,32 @@
 
 MACRO1(END_FUNCTION, c_name)
     CFI_ENDPROC
-    SIZE(\c_name, 0)
+    SIZE(SYMBOL(\c_name))
 END_MACRO
 
 MACRO1(PUSH, reg)
-    pushq REG_VAR(reg, 0)
+    pushq REG_VAR(reg)
     CFI_ADJUST_CFA_OFFSET(8)
-    CFI_REL_OFFSET(REG_VAR(reg, 0), 0)
+    CFI_REL_OFFSET(REG_VAR(reg), 0)
 END_MACRO
 
 MACRO1(POP, reg)
-    popq REG_VAR(reg,0)
+    popq REG_VAR(reg)
     CFI_ADJUST_CFA_OFFSET(-8)
-    CFI_RESTORE(REG_VAR(reg,0))
+    CFI_RESTORE(REG_VAR(reg))
 END_MACRO
 
 MACRO1(UNIMPLEMENTED,name)
-    FUNCTION_TYPE(\name, 0)
-    ASM_HIDDEN VAR(c_name, 0)
-    .globl VAR(name, 0)
+    FUNCTION_TYPE(SYMBOL(\name))
+    ASM_HIDDEN VAR(name)
+    .globl VAR(name)
     ALIGN_FUNCTION_ENTRY
-VAR(name, 0):
+VAR(name):
     CFI_STARTPROC
     int3
     int3
     CFI_ENDPROC
-    SIZE(\name, 0)
+    SIZE(SYMBOL(\name))
 END_MACRO
 
 MACRO0(UNREACHABLE)
@@ -173,14 +159,14 @@
 // Macros to poison (negate) the reference for heap poisoning.
 MACRO1(POISON_HEAP_REF, rRef)
 #ifdef USE_HEAP_POISONING
-    negl REG_VAR(rRef, 0)
+    negl REG_VAR(rRef)
 #endif  // USE_HEAP_POISONING
 END_MACRO
 
 // Macros to unpoison (negate) the reference for heap poisoning.
 MACRO1(UNPOISON_HEAP_REF, rRef)
 #ifdef USE_HEAP_POISONING
-    negl REG_VAR(rRef, 0)
+    negl REG_VAR(rRef)
 #endif  // USE_HEAP_POISONING
 END_MACRO
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 7d86c3a..7e7d789 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -275,33 +275,33 @@
 END_MACRO
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    DEFINE_FUNCTION VAR(c_name)
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
-    call VAR(cxx_name, 1)     // cxx_name(Thread*)
+    call VAR(cxx_name)                 // cxx_name(Thread*)
     UNREACHABLE
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    DEFINE_FUNCTION VAR(c_name)
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
-    call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*)
+    call VAR(cxx_name)                 // cxx_name(arg1, Thread*)
     UNREACHABLE
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    DEFINE_FUNCTION VAR(c_name)
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call VAR(cxx_name, 1)     // cxx_name(Thread*)
+    call VAR(cxx_name)                 // cxx_name(Thread*)
     UNREACHABLE
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
     /*
@@ -353,8 +353,7 @@
      *
      * Adapted from x86 code.
      */
-MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
@@ -362,7 +361,7 @@
     movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
     movq %rsp, %rcx                                        // pass SP
 
-    call VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, Thread*, SP)
+    call VAR(cxx_name)                                     // cxx_name(arg1, arg2, Thread*, SP)
                                                            // save the code pointer
     movq %rax, %rdi
     movq %rdx, %rax
@@ -375,10 +374,13 @@
     jmp *%rax
 1:
     DELIVER_PENDING_EXCEPTION
-    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name)
+    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
-INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
 
 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
@@ -397,7 +399,7 @@
     movb (%r10), %al              // al := *shorty
     addq MACRO_LITERAL(1), %r10   // shorty++
     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
-    je VAR(finished, 1)
+    je VAR(finished)
     cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
     je 2f
     cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
@@ -409,11 +411,11 @@
     addq MACRO_LITERAL(4), %r11   // arg_array++
     jmp 1b                        // goto LOOP
 2:  // FOUND_DOUBLE
-    movsd (%r11), REG_VAR(xmm_reg, 0)
+    movsd (%r11), REG_VAR(xmm_reg)
     addq MACRO_LITERAL(8), %r11   // arg_array+=2
     jmp 4f
 3:  // FOUND_FLOAT
-    movss (%r11), REG_VAR(xmm_reg, 0)
+    movss (%r11), REG_VAR(xmm_reg)
     addq MACRO_LITERAL(4), %r11   // arg_array++
 4:
 END_MACRO
@@ -428,18 +430,18 @@
     movb (%r10), %al              // al := *shorty
     addq MACRO_LITERAL(1), %r10   // shorty++
     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
-    je  VAR(finished, 2)
+    je  VAR(finished)
     cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
     je 2f
     cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
     je 3f
     cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
     je 4f
-    movl (%r11), REG_VAR(gpr_reg32, 1)
+    movl (%r11), REG_VAR(gpr_reg32)
     addq MACRO_LITERAL(4), %r11   // arg_array++
     jmp 5f
 2:  // FOUND_LONG
-    movq (%r11), REG_VAR(gpr_reg64, 0)
+    movq (%r11), REG_VAR(gpr_reg64)
     addq MACRO_LITERAL(8), %r11   // arg_array+=2
     jmp 5f
 3:  // SKIP_FLOAT
@@ -691,94 +693,94 @@
 END_FUNCTION art_quick_do_long_jump
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
-    call VAR(cxx_name, 1)                // cxx_name(Thread*)
+    call VAR(cxx_name)                   // cxx_name(Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)          // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)             // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
-    call VAR(cxx_name, 1)                // cxx_name(arg0, Thread*)
+    call VAR(cxx_name)                   // cxx_name(arg0, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)          // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)             // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
-    call VAR(cxx_name, 1)                // cxx_name(arg0, arg1, Thread*)
+    call VAR(cxx_name)                   // cxx_name(arg0, arg1, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)          // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)             // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, Thread*)
+    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)         // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)            // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name, 1)               // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    call VAR(cxx_name)                  // cxx_name(arg1, arg2, arg3, arg4, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)         // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)            // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rsi                  // pass referrer
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
                                         // arg0 is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
-    call VAR(cxx_name, 1)               // cxx_name(arg0, referrer, Thread*)
+    call VAR(cxx_name)                  // cxx_name(arg0, referrer, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rdx                  // pass referrer
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
                                         // arg0 and arg1 are in rdi/rsi
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name, 1)               // (arg0, arg1, referrer, Thread*)
+    call VAR(cxx_name)                  // (arg0, arg1, referrer, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rcx                  // pass referrer
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
                                         // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, referrer, Thread*)
+    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, referrer, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)         // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    CALL_MACRO(return_macro)            // return or deliver exception
+    END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
@@ -1143,7 +1145,7 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
     call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
-    int3                              // unreached
+    UNREACHABLE
 END_FUNCTION art_quick_check_cast
 
 
@@ -1273,7 +1275,7 @@
     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
                                             // Pass arg 1 = array.
     call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
-    int3                          // unreached
+    UNREACHABLE
 END_FUNCTION art_quick_aput_obj
 
 // TODO: This is quite silly on X86_64 now.
@@ -1352,7 +1354,7 @@
     int3
 #else
     movq %rax, %rdi
-    jmp art_quick_invoke_interface_trampoline
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 #endif  // __APPLE__
 END_FUNCTION art_quick_imt_conflict_trampoline
 
@@ -1670,7 +1672,7 @@
                                    // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
     call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
-    int3                           // Unreachable.
+    UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
     /*
@@ -1729,5 +1731,5 @@
                                     // first arg to longjmp is already in correct register
     movq LITERAL(1), %rsi           // second arg to longjmp (1)
     call PLT_SYMBOL(longjmp)
-    int3                            // won't get here
+    UNREACHABLE
 END_FUNCTION art_nested_signal_return
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 8712bdb..bb3c72c 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -20,6 +20,7 @@
 #include "art_method.h"
 
 #include "art_field.h"
+#include "base/logging.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "gc_root-inl.h"
@@ -317,7 +318,9 @@
 
 inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
   DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(sizeof(void*)));
+  if (kIsDebugBuild && !IsProxyMethod()) {
+    CHECK_EQ(code_pointer, GetQuickOatCodePointer(sizeof(void*)));
+  }
   return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
 }
 
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index c78a851..7673418 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -35,6 +35,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "oat_file-inl.h"
 #include "scoped_thread_state_change.h"
 #include "well_known_classes.h"
 
@@ -561,4 +562,14 @@
   return true;
 }
 
+const uint8_t* ArtMethod::GetQuickenedInfo() {
+  bool found = false;
+  OatFile::OatMethod oat_method =
+      Runtime::Current()->GetClassLinker()->FindOatMethodFor(this, &found);
+  if (!found || (oat_method.GetQuickCode() != nullptr)) {
+    return nullptr;
+  }
+  return oat_method.GetVmapTable();
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index e8c47d9..4169c5e 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -336,6 +336,8 @@
   const uint8_t* GetVmapTable(const void* code_pointer, size_t pointer_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  const uint8_t* GetQuickenedInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   CodeInfo GetOptimizedCodeInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a GcMap instance for convenient access.
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 07daa7e..3422625 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -50,6 +50,7 @@
   kAllocatorTagMonitorList,
   kAllocatorTagClassTable,
   kAllocatorTagInternTable,
+  kAllocatorTagLambdaBoxTable,
   kAllocatorTagMaps,
   kAllocatorTagLOS,
   kAllocatorTagSafeMap,
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index f2c8355..709d9ae 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -231,19 +231,33 @@
     return ret;
   }
 
+  // Lower case for c++11 for each. const version.
+  ConstIterator begin() const {
+    ConstIterator ret(this, 0);
+    if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
+      ++ret;  // Skip all the empty slots.
+    }
+    return ret;
+  }
+
   // Lower case for c++11 for each.
   Iterator end() {
     return Iterator(this, NumBuckets());
   }
 
+  // Lower case for c++11 for each. const version.
+  ConstIterator end() const {
+    return ConstIterator(this, NumBuckets());
+  }
+
   bool Empty() {
     return Size() == 0;
   }
 
   // Erase algorithm:
   // Make an empty slot where the iterator is pointing.
-  // Scan fowards until we hit another empty slot.
-  // If an element inbetween doesn't rehash to the range from the current empty slot to the
+  // Scan forwards until we hit another empty slot.
+  // If an element in between doesn't rehash to the range from the current empty slot to the
   // iterator. It must be before the empty slot, in that case we can move it to the empty slot
   // and set the empty slot to be the location we just moved from.
   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
@@ -299,23 +313,23 @@
   // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
   // object in the heap for performance solution.
   template <typename K>
-  Iterator Find(const K& element) {
-    return FindWithHash(element, hashfn_(element));
+  Iterator Find(const K& key) {
+    return FindWithHash(key, hashfn_(key));
   }
 
   template <typename K>
-  ConstIterator Find(const K& element) const {
-    return FindWithHash(element, hashfn_(element));
+  ConstIterator Find(const K& key) const {
+    return FindWithHash(key, hashfn_(key));
   }
 
   template <typename K>
-  Iterator FindWithHash(const K& element, size_t hash) {
-    return Iterator(this, FindIndex(element, hash));
+  Iterator FindWithHash(const K& key, size_t hash) {
+    return Iterator(this, FindIndex(key, hash));
   }
 
   template <typename K>
-  ConstIterator FindWithHash(const K& element, size_t hash) const {
-    return ConstIterator(this, FindIndex(element, hash));
+  ConstIterator FindWithHash(const K& key, size_t hash) const {
+    return ConstIterator(this, FindIndex(key, hash));
   }
 
   // Insert an element, allows duplicates.
@@ -399,6 +413,10 @@
   }
 
   size_t IndexForHash(size_t hash) const {
+    // Protect against undefined behavior (division by zero).
+    if (UNLIKELY(num_buckets_ == 0)) {
+      return 0;
+    }
     return hash % num_buckets_;
   }
 
@@ -414,6 +432,10 @@
   // This value for not found is important so that Iterator(this, FindIndex(...)) == end().
   template <typename K>
   size_t FindIndex(const K& element, size_t hash) const {
+    // Guard against failing to get an element for a non-existing index.
+    if (UNLIKELY(NumBuckets() == 0)) {
+      return 0;
+    }
     DCHECK_EQ(hashfn_(element), hash);
     size_t index = IndexForHash(hash);
     while (true) {
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index fd9eb45..4ef1f9e 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -186,6 +186,12 @@
   // Shrink again, the load factor should be good again.
   hash_set.ShrinkToMaximumLoad();
   EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
+
+  // Make sure all the initial elements we had are still there
+  for (const std::string& initial_string : strings) {
+    EXPECT_NE(hash_set.end(), hash_set.Find(initial_string))
+        << "expected to find " << initial_string;
+  }
 }
 
 TEST_F(HashSetTest, TestStress) {
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index aba3762..03980e3 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -66,7 +66,7 @@
   while (max_ < new_max) {
     // If we have reached the maximum number of buckets, merge buckets together.
     if (frequency_.size() >= max_buckets_) {
-      CHECK(IsAligned<2>(frequency_.size()));
+      CHECK_ALIGNED(frequency_.size(), 2);
       // We double the width of each bucket to reduce the number of buckets by a factor of 2.
       bucket_width_ *= 2;
       const size_t limit = frequency_.size() / 2;
diff --git a/runtime/base/memory_tool.h b/runtime/base/memory_tool.h
index 31162a3..e0bdcfe 100644
--- a/runtime/base/memory_tool.h
+++ b/runtime/base/memory_tool.h
@@ -27,9 +27,17 @@
 
 #include <sanitizer/asan_interface.h>
 #define ADDRESS_SANITIZER
+
+#ifdef ART_ENABLE_ADDRESS_SANITIZER
 #define MEMORY_TOOL_MAKE_NOACCESS(p, s) __asan_poison_memory_region(p, s)
 #define MEMORY_TOOL_MAKE_UNDEFINED(p, s) __asan_unpoison_memory_region(p, s)
 #define MEMORY_TOOL_MAKE_DEFINED(p, s) __asan_unpoison_memory_region(p, s)
+#else
+#define MEMORY_TOOL_MAKE_NOACCESS(p, s) do { (void)(p); (void)(s); } while (0)
+#define MEMORY_TOOL_MAKE_UNDEFINED(p, s) do { (void)(p); (void)(s); } while (0)
+#define MEMORY_TOOL_MAKE_DEFINED(p, s) do { (void)(p); (void)(s); } while (0)
+#endif
+
 #define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
 #define RUNNING_ON_MEMORY_TOOL 1U
 constexpr bool kMemoryToolIsValgrind = false;
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index e48d170..c591a51 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -61,6 +61,7 @@
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::unexpected_signal_lock_ = nullptr;
+Mutex* Locks::lambda_table_lock_ = nullptr;
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
@@ -946,6 +947,7 @@
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(unexpected_signal_lock_ != nullptr);
+    DCHECK(lambda_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -1048,6 +1050,10 @@
     DCHECK(reference_queue_soft_references_lock_ == nullptr);
     reference_queue_soft_references_lock_ = new Mutex("ReferenceQueue soft references lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kLambdaTableLock);
+    DCHECK(lambda_table_lock_ == nullptr);
+    lambda_table_lock_ = new Mutex("lambda table lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
     DCHECK(abort_lock_ == nullptr);
     abort_lock_ = new Mutex("abort lock", current_lock_level, true);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index f87467a..5b258e5 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -60,6 +60,7 @@
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
+  kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kTransactionLogLock,
@@ -648,6 +649,10 @@
 
   // Have an exclusive logging thread.
   static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
+
+  // Allow reader-writer mutual exclusion on the boxed table of lambda objects.
+  // TODO: this should be a RW mutex lock, except that ConditionVariables don't work with it.
+  static Mutex* lambda_table_lock_ ACQUIRED_AFTER(mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 45fb9c4..0ae32f4 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1206,6 +1206,8 @@
       const_cast<char*>(copy->StartRedZone())[i] = kCanary[j];
       if (kCanary[j] == '\0') {
         j = 0;
+      } else {
+        j++;
       }
     }
 
@@ -1217,6 +1219,8 @@
       const_cast<char*>(copy->EndRedZone())[i] = kCanary[j];
       if (kCanary[j] == '\0') {
         j = 0;
+      } else {
+        j++;
       }
     }
 
@@ -1367,6 +1371,8 @@
       }
       if (kCanary[j] == '\0') {
         j = 0;
+      } else {
+        j++;
       }
     }
 
@@ -1381,6 +1387,8 @@
       }
       if (kCanary[j] == '\0') {
         j = 0;
+      } else {
+        j++;
       }
     }
     return true;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0694227..8f7862a 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -55,6 +55,7 @@
 #include "linear_alloc.h"
 #include "oat.h"
 #include "oat_file.h"
+#include "oat_file-inl.h"
 #include "oat_file_assistant.h"
 #include "object_lock.h"
 #include "mirror/class.h"
@@ -194,7 +195,9 @@
   bool operator() (const FieldGap& lhs, const FieldGap& rhs)
       NO_THREAD_SAFETY_ANALYSIS {
     // Sort by gap size, largest first. Secondary sort by starting offset.
-    return lhs.size > rhs.size || (lhs.size == rhs.size && lhs.start_offset < rhs.start_offset);
+    // Note that the priority queue returns the largest element, so operator()
+    // should return true if lhs is less than rhs.
+    return lhs.size < rhs.size || (lhs.size == rhs.size && lhs.start_offset > rhs.start_offset);
   }
 };
 typedef std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator> FieldGaps;
@@ -248,13 +251,13 @@
     if (!gaps->empty() && gaps->top().size >= n) {
       FieldGap gap = gaps->top();
       gaps->pop();
-      DCHECK(IsAligned<n>(gap.start_offset));
+      DCHECK_ALIGNED(gap.start_offset, n);
       field->SetOffset(MemberOffset(gap.start_offset));
       if (gap.size > n) {
         AddFieldGap(gap.start_offset + n, gap.start_offset + gap.size, gaps);
       }
     } else {
-      DCHECK(IsAligned<n>(field_offset->Uint32Value()));
+      DCHECK_ALIGNED(field_offset->Uint32Value(), n);
       field->SetOffset(*field_offset);
       *field_offset = MemberOffset(field_offset->Uint32Value() + n);
     }
@@ -3037,6 +3040,18 @@
     mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
   }
 
+  // Skip verification if we are forcing a soft fail.
+  // This has to be before the normal verification enabled check,
+  // since technically verification is disabled in this mode.
+  if (UNLIKELY(Runtime::Current()->IsVerificationSoftFail())) {
+    // Force verification to be a 'soft failure'.
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
+    // As this is a fake verified status, make sure the methods are _not_ marked preverified
+    // later.
+    klass->SetPreverified();
+    return;
+  }
+
   // Skip verification if disabled.
   if (!Runtime::Current()->IsVerificationEnabled()) {
     mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
@@ -5173,7 +5188,7 @@
       field_offset = MemberOffset(RoundUp(field_offset.Uint32Value(), 4));
       AddFieldGap(old_offset.Uint32Value(), field_offset.Uint32Value(), &gaps);
     }
-    DCHECK(IsAligned<sizeof(mirror::HeapReference<mirror::Object>)>(field_offset.Uint32Value()));
+    DCHECK_ALIGNED(field_offset.Uint32Value(), sizeof(mirror::HeapReference<mirror::Object>));
     grouped_and_sorted_fields.pop_front();
     num_reference_fields++;
     field->SetOffset(field_offset);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index e4f7b7a..b60cba4 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -406,6 +406,9 @@
   const void* GetOatMethodQuickCodeFor(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  const OatFile::OatMethod FindOatMethodFor(ArtMethod* method, bool* found)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   pid_t GetClassesLockOwner();  // For SignalCatcher.
   pid_t GetDexLockOwner();  // For SignalCatcher.
 
@@ -484,9 +487,6 @@
   void DropFindArrayClassCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  const OatFile::OatMethod FindOatMethodFor(ArtMethod* method, bool* found)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 97d170e..eccebf1 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2100,6 +2100,7 @@
     case kWaitingInMainDebuggerLoop:
     case kWaitingInMainSignalCatcherLoop:
     case kWaitingPerformingGc:
+    case kWaitingWeakRootRead:
     case kWaiting:
       return JDWP::TS_WAIT;
       // Don't add a 'default' here so the compiler can spot incompatible enum changes.
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 0ddbf7c..df2d379 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -488,6 +488,12 @@
   // Returns true if the instruction allows control flow to go to the following instruction.
   bool CanFlowThrough() const;
 
+  // Returns true if the instruction is a quickened instruction.
+  bool IsQuickened() const {
+    return (kInstructionIndexTypes[Opcode()] == kIndexFieldOffset) ||
+        (kInstructionIndexTypes[Opcode()] == kIndexVtableOffset);
+  }
+
   // Returns true if this instruction is a switch.
   bool IsSwitch() const {
     return (kInstructionFlags[Opcode()] & kSwitch) != 0;
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index bc3ba21..de4b3f4 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -93,7 +93,7 @@
 
     // NOTE: Don't align the code (it will not be executed) but check that the Thumb2
     // adjustment will be a NOP, see ArtMethod::EntryPointToCodePointer().
-    CHECK_EQ(mapping_table_offset & 1u, 0u);
+    CHECK_ALIGNED(mapping_table_offset, 2);
     const uint8_t* code_ptr = &fake_header_code_and_maps_[gc_map_offset];
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*));
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5f91566..47f9b1b 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -331,7 +331,7 @@
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
   // TODO: Check linear alloc and image.
-  DCHECK(IsAligned<sizeof(void*)>(ArtMethod::ObjectSize(sizeof(void*))))
+  DCHECK_ALIGNED(ArtMethod::ObjectSize(sizeof(void*)), sizeof(void*))
       << "ArtMethod is not pointer aligned";
   if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) {
     VLOG(signals) << "no method";
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index cd3f910..009254b 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -21,16 +21,11 @@
 #include "base/stl_util.h"
 #include "bitmap-inl.h"
 #include "card_table-inl.h"
-#include "heap_bitmap.h"
 #include "gc/accounting/space_bitmap-inl.h"
-#include "gc/collector/mark_sweep.h"
-#include "gc/collector/mark_sweep-inl.h"
 #include "gc/heap.h"
-#include "gc/space/space.h"
 #include "gc/space/image_space.h"
+#include "gc/space/space.h"
 #include "mirror/object-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
 
@@ -95,11 +90,11 @@
 
 class ModUnionUpdateObjectReferencesVisitor {
  public:
-  ModUnionUpdateObjectReferencesVisitor(MarkHeapReferenceCallback* callback, void* arg,
+  ModUnionUpdateObjectReferencesVisitor(MarkObjectVisitor* visitor,
                                         space::ContinuousSpace* from_space,
                                         space::ContinuousSpace* immune_space,
                                         bool* contains_reference_to_other_space)
-    : callback_(callback), arg_(arg), from_space_(from_space), immune_space_(immune_space),
+    : visitor_(visitor), from_space_(from_space), immune_space_(immune_space),
       contains_reference_to_other_space_(contains_reference_to_other_space) {
   }
 
@@ -111,13 +106,12 @@
     mirror::Object* ref = obj_ptr->AsMirrorPtr();
     if (ref != nullptr && !from_space_->HasAddress(ref) && !immune_space_->HasAddress(ref)) {
       *contains_reference_to_other_space_ = true;
-      callback_(obj_ptr, arg_);
+      visitor_->MarkHeapReference(obj_ptr);
     }
   }
 
  private:
-  MarkHeapReferenceCallback* const callback_;
-  void* const arg_;
+  MarkObjectVisitor* const visitor_;
   // Space which we are scanning
   space::ContinuousSpace* const from_space_;
   space::ContinuousSpace* const immune_space_;
@@ -129,25 +123,24 @@
  public:
   // Immune space is any other space which we don't care about references to. Currently this is
   // the image space in the case of the zygote mod union table.
-  ModUnionScanImageRootVisitor(MarkHeapReferenceCallback* callback, void* arg,
+  ModUnionScanImageRootVisitor(MarkObjectVisitor* visitor,
                                space::ContinuousSpace* from_space,
                                space::ContinuousSpace* immune_space,
                                bool* contains_reference_to_other_space)
-      : callback_(callback), arg_(arg), from_space_(from_space), immune_space_(immune_space),
+      : visitor_(visitor), from_space_(from_space), immune_space_(immune_space),
         contains_reference_to_other_space_(contains_reference_to_other_space) {}
 
   void operator()(Object* root) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != nullptr);
-    ModUnionUpdateObjectReferencesVisitor ref_visitor(callback_, arg_, from_space_, immune_space_,
+    ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_, from_space_, immune_space_,
                                                       contains_reference_to_other_space_);
     root->VisitReferences<kMovingClasses>(ref_visitor, VoidFunctor());
   }
 
  private:
-  MarkHeapReferenceCallback* const callback_;
-  void* const arg_;
+  MarkObjectVisitor* const visitor_;
   // Space which we are scanning
   space::ContinuousSpace* const from_space_;
   space::ContinuousSpace* const immune_space_;
@@ -305,8 +298,7 @@
   }
 }
 
-void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
-                                                          void* arg) {
+void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   CardTable* card_table = heap_->GetCardTable();
 
   std::vector<mirror::HeapReference<Object>*> cards_references;
@@ -338,7 +330,7 @@
   size_t count = 0;
   for (const auto& ref : references_) {
     for (mirror::HeapReference<Object>* obj_ptr : ref.second) {
-      callback(obj_ptr, arg);
+      visitor->MarkHeapReference(obj_ptr);
     }
     count += ref.second.size();
   }
@@ -362,9 +354,9 @@
 
 class CardBitVisitor {
  public:
-  CardBitVisitor(MarkHeapReferenceCallback* callback, void* arg, space::ContinuousSpace* space,
+  CardBitVisitor(MarkObjectVisitor* visitor, space::ContinuousSpace* space,
                  space::ContinuousSpace* immune_space, ModUnionTable::CardBitmap* card_bitmap)
-      : callback_(callback), arg_(arg), space_(space), immune_space_(immune_space),
+      : visitor_(visitor), space_(space), immune_space_(immune_space),
         bitmap_(space->GetLiveBitmap()), card_bitmap_(card_bitmap) {
     DCHECK(immune_space_ != nullptr);
   }
@@ -374,7 +366,7 @@
     DCHECK(space_->HasAddress(reinterpret_cast<mirror::Object*>(start)))
         << start << " " << *space_;
     bool reference_to_other_space = false;
-    ModUnionScanImageRootVisitor scan_visitor(callback_, arg_, space_, immune_space_,
+    ModUnionScanImageRootVisitor scan_visitor(visitor_, space_, immune_space_,
                                               &reference_to_other_space);
     bitmap_->VisitMarkedRange(start, start + CardTable::kCardSize, scan_visitor);
     if (!reference_to_other_space) {
@@ -384,8 +376,7 @@
   }
 
  private:
-  MarkHeapReferenceCallback* const callback_;
-  void* const arg_;
+  MarkObjectVisitor* const visitor_;
   space::ContinuousSpace* const space_;
   space::ContinuousSpace* const immune_space_;
   ContinuousSpaceBitmap* const bitmap_;
@@ -400,15 +391,14 @@
 }
 
 // Mark all references to the alloc space(s).
-void ModUnionTableCardCache::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
-                                                     void* arg) {
+void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   auto* image_space = heap_->GetImageSpace();
   // If we don't have an image space, just pass in space_ as the immune space. Pass in the same
   // space_ instead of image_space to avoid a null check in ModUnionUpdateObjectReferencesVisitor.
-  CardBitVisitor visitor(callback, arg, space_, image_space != nullptr ? image_space : space_,
+  CardBitVisitor bit_visitor(visitor, space_, image_space != nullptr ? image_space : space_,
       card_bitmap_.get());
   card_bitmap_->VisitSetBits(
-      0, RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, visitor);
+      0, RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, bit_visitor);
 }
 
 void ModUnionTableCardCache::Dump(std::ostream& os) {
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 2e232ca..520cc1c 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -76,7 +76,7 @@
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
   // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
   // spaces which are stored in the mod-union table.
-  virtual void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback, void* arg) = 0;
+  virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) = 0;
 
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
@@ -117,7 +117,7 @@
   void ClearCards() OVERRIDE;
 
   // Update table based on cleared cards and mark all references to the other spaces.
-  void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback, void* arg) OVERRIDE
+  void UpdateAndMarkReferences(MarkObjectVisitor* visitor) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -157,7 +157,7 @@
   virtual void ClearCards() OVERRIDE;
 
   // Mark all references to the alloc space(s).
-  virtual void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback, void* arg) OVERRIDE
+  virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) OVERRIDE
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index 363b76a..aad8a25 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -93,12 +93,24 @@
 };
 
 // Collect visited objects into container.
-static void CollectVisitedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(ref != nullptr);
-  DCHECK(arg != nullptr);
-  reinterpret_cast<std::set<mirror::Object*>*>(arg)->insert(ref->AsMirrorPtr());
-}
+class CollectVisitedVisitor : public MarkObjectVisitor {
+ public:
+  explicit CollectVisitedVisitor(std::set<mirror::Object*>* out) : out_(out) {}
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(ref != nullptr);
+    MarkObject(ref->AsMirrorPtr());
+  }
+  virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(obj != nullptr);
+    out_->insert(obj);
+    return obj;
+  }
+
+ private:
+  std::set<mirror::Object*>* const out_;
+};
 
 // A mod union table that only holds references to a specified target space.
 class ModUnionTableRefCacheToSpace : public ModUnionTableReferenceCache {
@@ -199,7 +211,8 @@
   obj2->Set(3, other_space_ref2);
   table->ClearCards();
   std::set<mirror::Object*> visited_before;
-  table->UpdateAndMarkReferences(&CollectVisitedCallback, &visited_before);
+  CollectVisitedVisitor collector_before(&visited_before);
+  table->UpdateAndMarkReferences(&collector_before);
   // Check that we visited all the references in other spaces only.
   ASSERT_GE(visited_before.size(), 2u);
   ASSERT_TRUE(visited_before.find(other_space_ref1) != visited_before.end());
@@ -230,7 +243,8 @@
   }
   // Visit again and make sure the cards got cleared back to their sane state.
   std::set<mirror::Object*> visited_after;
-  table->UpdateAndMarkReferences(&CollectVisitedCallback, &visited_after);
+  CollectVisitedVisitor collector_after(&visited_after);
+  table->UpdateAndMarkReferences(&collector_after);
   // Check that we visited a superset after.
   for (auto* obj : visited_before) {
     ASSERT_TRUE(visited_after.find(obj) != visited_after.end()) << obj;
diff --git a/runtime/gc/accounting/read_barrier_table.h b/runtime/gc/accounting/read_barrier_table.h
index 436df92..86266e2 100644
--- a/runtime/gc/accounting/read_barrier_table.h
+++ b/runtime/gc/accounting/read_barrier_table.h
@@ -51,8 +51,8 @@
   void Clear(uint8_t* start_addr, uint8_t* end_addr) {
     DCHECK(IsValidHeapAddr(start_addr)) << start_addr;
     DCHECK(IsValidHeapAddr(end_addr)) << end_addr;
-    DCHECK(IsAligned<kRegionSize>(start_addr));
-    DCHECK(IsAligned<kRegionSize>(end_addr));
+    DCHECK_ALIGNED(start_addr, kRegionSize);
+    DCHECK_ALIGNED(end_addr, kRegionSize);
     uint8_t* entry_start = EntryFromAddr(start_addr);
     uint8_t* entry_end = EntryFromAddr(end_addr);
     memset(reinterpret_cast<void*>(entry_start), 0, entry_end - entry_start);
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index eeb385e..23ab8df 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -61,11 +61,10 @@
 
 class RememberedSetReferenceVisitor {
  public:
-  RememberedSetReferenceVisitor(MarkHeapReferenceCallback* callback,
-                                DelayReferenceReferentCallback* ref_callback,
-                                space::ContinuousSpace* target_space,
-                                bool* const contains_reference_to_target_space, void* arg)
-      : callback_(callback), ref_callback_(ref_callback), target_space_(target_space), arg_(arg),
+  RememberedSetReferenceVisitor(space::ContinuousSpace* target_space,
+                                bool* const contains_reference_to_target_space,
+                                collector::GarbageCollector* collector)
+      : collector_(collector), target_space_(target_space),
         contains_reference_to_target_space_(contains_reference_to_target_space) {}
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) const
@@ -74,7 +73,7 @@
     mirror::HeapReference<mirror::Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
     if (target_space_->HasAddress(ref_ptr->AsMirrorPtr())) {
       *contains_reference_to_target_space_ = true;
-      callback_(ref_ptr, arg_);
+      collector_->MarkHeapReference(ref_ptr);
       DCHECK(!target_space_->HasAddress(ref_ptr->AsMirrorPtr()));
     }
   }
@@ -84,49 +83,43 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     if (target_space_->HasAddress(ref->GetReferent())) {
       *contains_reference_to_target_space_ = true;
-      ref_callback_(klass, ref, arg_);
+      collector_->DelayReferenceReferent(klass, ref);
     }
   }
 
  private:
-  MarkHeapReferenceCallback* const callback_;
-  DelayReferenceReferentCallback* const ref_callback_;
+  collector::GarbageCollector* const collector_;
   space::ContinuousSpace* const target_space_;
-  void* const arg_;
   bool* const contains_reference_to_target_space_;
 };
 
 class RememberedSetObjectVisitor {
  public:
-  RememberedSetObjectVisitor(MarkHeapReferenceCallback* callback,
-                             DelayReferenceReferentCallback* ref_callback,
-                             space::ContinuousSpace* target_space,
-                             bool* const contains_reference_to_target_space, void* arg)
-      : callback_(callback), ref_callback_(ref_callback), target_space_(target_space), arg_(arg),
+  RememberedSetObjectVisitor(space::ContinuousSpace* target_space,
+                             bool* const contains_reference_to_target_space,
+                             collector::GarbageCollector* collector)
+      : collector_(collector), target_space_(target_space),
         contains_reference_to_target_space_(contains_reference_to_target_space) {}
 
   void operator()(mirror::Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    RememberedSetReferenceVisitor visitor(callback_, ref_callback_, target_space_,
-                                          contains_reference_to_target_space_, arg_);
+    RememberedSetReferenceVisitor visitor(target_space_, contains_reference_to_target_space_,
+                                          collector_);
     obj->VisitReferences<kMovingClasses>(visitor, visitor);
   }
 
  private:
-  MarkHeapReferenceCallback* const callback_;
-  DelayReferenceReferentCallback* const ref_callback_;
+  collector::GarbageCollector* const collector_;
   space::ContinuousSpace* const target_space_;
-  void* const arg_;
   bool* const contains_reference_to_target_space_;
 };
 
-void RememberedSet::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
-                                            DelayReferenceReferentCallback* ref_callback,
-                                            space::ContinuousSpace* target_space, void* arg) {
+void RememberedSet::UpdateAndMarkReferences(space::ContinuousSpace* target_space,
+                                            collector::GarbageCollector* collector) {
   CardTable* card_table = heap_->GetCardTable();
   bool contains_reference_to_target_space = false;
-  RememberedSetObjectVisitor obj_visitor(callback, ref_callback, target_space,
-                                         &contains_reference_to_target_space, arg);
+  RememberedSetObjectVisitor obj_visitor(target_space, &contains_reference_to_target_space,
+                                         collector);
   ContinuousSpaceBitmap* bitmap = space_->GetLiveBitmap();
   CardSet remove_card_set;
   for (uint8_t* const card_addr : dirty_cards_) {
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index c51e26d..affe863 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -29,6 +29,7 @@
 namespace gc {
 
 namespace collector {
+  class GarbageCollector;
   class MarkSweep;
 }  // namespace collector
 namespace space {
@@ -53,9 +54,8 @@
   void ClearCards();
 
   // Mark through all references to the target space.
-  void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
-                               DelayReferenceReferentCallback* ref_callback,
-                               space::ContinuousSpace* target_space, void* arg)
+  void UpdateAndMarkReferences(space::ContinuousSpace* target_space,
+                               collector::GarbageCollector* collector)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 6546eb4..cdeaa50 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -79,7 +79,7 @@
 
 template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::SetHeapLimit(uintptr_t new_end) {
-  DCHECK(IsAligned<kBitsPerIntPtrT * kAlignment>(new_end));
+  DCHECK_ALIGNED(new_end, kBitsPerIntPtrT * kAlignment);
   size_t new_size = OffsetToIndex(new_end - heap_begin_) * sizeof(intptr_t);
   if (new_size < bitmap_size_) {
     bitmap_size_ = new_size;
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 35faff3..e0661b6 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -30,6 +30,7 @@
 namespace art {
 
 namespace mirror {
+  class Class;
   class Object;
 }  // namespace mirror
 class MemMap;
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 88c475b..3108b7c 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -110,23 +110,24 @@
   }
 }
 
-static inline void SweepClassObject(AllocRecord* record, IsMarkedCallback* callback, void* arg)
+static inline void SweepClassObject(AllocRecord* record, IsMarkedVisitor* visitor)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
   GcRoot<mirror::Class>& klass = record->GetClassGcRoot();
   // This does not need a read barrier because this is called by GC.
   mirror::Object* old_object = klass.Read<kWithoutReadBarrier>();
-  // The class object can become null if we implement class unloading.
-  // In that case we might still want to keep the class name string (not implemented).
-  mirror::Object* new_object = UNLIKELY(old_object == nullptr) ?
-      nullptr : callback(old_object, arg);
-  if (UNLIKELY(old_object != new_object)) {
-    mirror::Class* new_klass = UNLIKELY(new_object == nullptr) ? nullptr : new_object->AsClass();
-    klass = GcRoot<mirror::Class>(new_klass);
+  if (old_object != nullptr) {
+    // The class object can become null if we implement class unloading.
+    // In that case we might still want to keep the class name string (not implemented).
+    mirror::Object* new_object = visitor->IsMarked(old_object);
+    DCHECK(new_object != nullptr);
+    if (UNLIKELY(old_object != new_object)) {
+      klass = GcRoot<mirror::Class>(new_object->AsClass());
+    }
   }
 }
 
-void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) {
+void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedVisitor* visitor) {
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0, count = 0;
   // Only the first (size - recent_record_max_) number of records can be deleted.
@@ -141,11 +142,11 @@
     // This does not need a read barrier because this is called by GC.
     mirror::Object* old_object = it->first.Read<kWithoutReadBarrier>();
     AllocRecord* record = it->second;
-    mirror::Object* new_object = old_object == nullptr ? nullptr : callback(old_object, arg);
+    mirror::Object* new_object = old_object == nullptr ? nullptr : visitor->IsMarked(old_object);
     if (new_object == nullptr) {
       if (count > delete_bound) {
         it->first = GcRoot<mirror::Object>(nullptr);
-        SweepClassObject(record, callback, arg);
+        SweepClassObject(record, visitor);
         ++it;
       } else {
         delete record;
@@ -157,7 +158,7 @@
         it->first = GcRoot<mirror::Object>(new_object);
         ++count_moved;
       }
-      SweepClassObject(record, callback, arg);
+      SweepClassObject(record, visitor);
       ++it;
     }
   }
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 06721c8..933363b 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -261,7 +261,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
-  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
+  void SweepAllocationRecords(IsMarkedVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index bd10f7b..abaa97f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -63,7 +63,7 @@
   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
   CHECK_LE(capacity, max_capacity);
-  CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
+  CHECK_ALIGNED(page_release_size_threshold_, kPageSize);
   if (!initialized_) {
     Initialize();
   }
@@ -349,7 +349,7 @@
     fpr->magic_num_ = kMagicNumFree;
   }
   fpr->SetByteSize(this, byte_size);
-  DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
+  DCHECK_ALIGNED(fpr->ByteSize(this), kPageSize);
 
   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
   if (!free_page_runs_.empty()) {
@@ -1567,7 +1567,7 @@
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
         DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
         size_t fpr_size = fpr->ByteSize(this);
-        DCHECK(IsAligned<kPageSize>(fpr_size));
+        DCHECK_ALIGNED(fpr_size, kPageSize);
         void* start = fpr;
         if (kIsDebugBuild) {
           // In the debug build, the first page of a free page run
@@ -1916,7 +1916,7 @@
           CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
               << "An empty page must belong to the free page run set";
           size_t fpr_size = fpr->ByteSize(this);
-          CHECK(IsAligned<kPageSize>(fpr_size))
+          CHECK_ALIGNED(fpr_size, kPageSize)
               << "A free page run size isn't page-aligned : " << fpr_size;
           size_t num_pages = fpr_size / kPageSize;
           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
@@ -2163,7 +2163,7 @@
           // to the next page.
           if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
             size_t fpr_size = fpr->ByteSize(this);
-            DCHECK(IsAligned<kPageSize>(fpr_size));
+            DCHECK_ALIGNED(fpr_size, kPageSize);
             uint8_t* start = reinterpret_cast<uint8_t*>(fpr);
             reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
             size_t pages = fpr_size / kPageSize;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 9316b27..c803655 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -73,6 +73,14 @@
   }
 }
 
+void ConcurrentCopying::MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) {
+  // Used for preserving soft references, should be OK to not have a CAS here since there should be
+  // no other threads which can trigger read barriers on the same referent during reference
+  // processing.
+  from_ref->Assign(Mark(from_ref->AsMirrorPtr()));
+  DCHECK(!from_ref->IsNull());
+}
+
 ConcurrentCopying::~ConcurrentCopying() {
   STLDeleteElements(&pooled_mark_stacks_);
 }
@@ -308,7 +316,7 @@
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class EmptyCheckpoint : public Closure {
@@ -429,7 +437,7 @@
       LOG(INFO) << "ProcessReferences";
     }
     // Process weak references. This may produce new refs to process and have them processed via
-    // ProcessMarkStackCallback (in the GC exclusive mark stack mode).
+    // ProcessMarkStack (in the GC exclusive mark stack mode).
     ProcessReferences(self);
     CheckEmptyMarkStack();
     if (kVerboseMode) {
@@ -644,7 +652,7 @@
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor {
@@ -732,16 +740,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
   }
-  static void RootCallback(mirror::Object** root, void *arg, const RootInfo& /*root_info*/)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
-    ConcurrentCopyingAssertToSpaceInvariantRefsVisitor visitor(collector);
-    DCHECK(root != nullptr);
-    visitor(*root);
-  }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class ConcurrentCopyingAssertToSpaceInvariantFieldVisitor {
@@ -762,7 +763,7 @@
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class ConcurrentCopyingAssertToSpaceInvariantObjectVisitor {
@@ -785,7 +786,7 @@
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class RevokeThreadLocalMarkStackCheckpoint : public Closure {
@@ -1088,7 +1089,7 @@
 void ConcurrentCopying::SweepSystemWeaks(Thread* self) {
   TimingLogger::ScopedTiming split("SweepSystemWeaks", GetTimings());
   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
+  Runtime::Current()->SweepSystemWeaks(this);
 }
 
 void ConcurrentCopying::Sweep(bool swap_bitmaps) {
@@ -1293,7 +1294,7 @@
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 // Compute how much live objects are left in regions.
@@ -1586,7 +1587,7 @@
 // Fill the given memory block with a dummy object. Used to fill in a
 // copy of objects that was lost in race.
 void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size) {
-  CHECK(IsAligned<kObjectAlignment>(byte_size));
+  CHECK_ALIGNED(byte_size, kObjectAlignment);
   memset(dummy_obj, 0, byte_size);
   mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
   CHECK(int_array_class != nullptr);
@@ -1619,7 +1620,7 @@
 // Reuse the memory blocks that were copy of objects that were lost in race.
 mirror::Object* ConcurrentCopying::AllocateInSkippedBlock(size_t alloc_size) {
   // Try to reuse the blocks that were unused due to CAS failures.
-  CHECK(IsAligned<space::RegionSpace::kAlignment>(alloc_size));
+  CHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
   Thread* self = Thread::Current();
   size_t min_object_size = RoundUp(sizeof(mirror::Object), space::RegionSpace::kAlignment);
   MutexLock mu(self, skipped_blocks_lock_);
@@ -1638,7 +1639,7 @@
         // Not found.
         return nullptr;
       }
-      CHECK(IsAligned<space::RegionSpace::kAlignment>(it->first - alloc_size));
+      CHECK_ALIGNED(it->first - alloc_size, space::RegionSpace::kAlignment);
       CHECK_GE(it->first - alloc_size, min_object_size)
           << "byte_size=" << byte_size << " it->first=" << it->first << " alloc_size=" << alloc_size;
     }
@@ -1649,7 +1650,7 @@
   uint8_t* addr = it->second;
   CHECK_GE(byte_size, alloc_size);
   CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr)));
-  CHECK(IsAligned<space::RegionSpace::kAlignment>(byte_size));
+  CHECK_ALIGNED(byte_size, space::RegionSpace::kAlignment);
   if (kVerboseMode) {
     LOG(INFO) << "Reusing skipped bytes : " << reinterpret_cast<void*>(addr) << ", " << byte_size;
   }
@@ -1657,7 +1658,7 @@
   memset(addr, 0, byte_size);
   if (byte_size > alloc_size) {
     // Return the remainder to the map.
-    CHECK(IsAligned<space::RegionSpace::kAlignment>(byte_size - alloc_size));
+    CHECK_ALIGNED(byte_size - alloc_size, space::RegionSpace::kAlignment);
     CHECK_GE(byte_size - alloc_size, min_object_size);
     FillWithDummyObject(reinterpret_cast<mirror::Object*>(addr + alloc_size),
                         byte_size - alloc_size);
@@ -2029,14 +2030,9 @@
   heap_->ClearMarkedObjects();
 }
 
-mirror::Object* ConcurrentCopying::IsMarkedCallback(mirror::Object* from_ref, void* arg) {
-  return reinterpret_cast<ConcurrentCopying*>(arg)->IsMarked(from_ref);
-}
-
-bool ConcurrentCopying::IsHeapReferenceMarkedCallback(
-    mirror::HeapReference<mirror::Object>* field, void* arg) {
+bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) {
   mirror::Object* from_ref = field->AsMirrorPtr();
-  mirror::Object* to_ref = reinterpret_cast<ConcurrentCopying*>(arg)->IsMarked(from_ref);
+  mirror::Object* to_ref = IsMarked(from_ref);
   if (to_ref == nullptr) {
     return false;
   }
@@ -2048,18 +2044,12 @@
   return true;
 }
 
-mirror::Object* ConcurrentCopying::MarkCallback(mirror::Object* from_ref, void* arg) {
-  return reinterpret_cast<ConcurrentCopying*>(arg)->Mark(from_ref);
-}
-
-void ConcurrentCopying::ProcessMarkStackCallback(void* arg) {
-  ConcurrentCopying* concurrent_copying = reinterpret_cast<ConcurrentCopying*>(arg);
-  concurrent_copying->ProcessMarkStack();
+mirror::Object* ConcurrentCopying::MarkObject(mirror::Object* from_ref) {
+  return Mark(from_ref);
 }
 
 void ConcurrentCopying::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(
-      klass, reference, &IsHeapReferenceMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
 void ConcurrentCopying::ProcessReferences(Thread* self) {
@@ -2067,8 +2057,7 @@
   // We don't really need to lock the heap bitmap lock as we use CAS to mark in bitmaps.
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true /*concurrent*/, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
-      &IsHeapReferenceMarkedCallback, &MarkCallback, &ProcessMarkStackCallback, this);
+      true /*concurrent*/, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this);
 }
 
 void ConcurrentCopying::RevokeAllThreadLocalBuffers() {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 1fb4703..f1317b8 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -121,7 +121,7 @@
   void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
-  void ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void ProcessMarkStack() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool ProcessMarkStackOnce() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access)
@@ -130,18 +130,16 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SwitchToSharedMarkStackMode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SwitchToGcExclusiveMarkStackMode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+  virtual void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ProcessReferences(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* IsMarked(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static mirror::Object* MarkCallback(mirror::Object* from_ref, void* arg)
+  virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static mirror::Object* IsMarkedCallback(mirror::Object* from_ref, void* arg)
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static bool IsHeapReferenceMarkedCallback(
-      mirror::HeapReference<mirror::Object>* field, void* arg)
+  virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void ProcessMarkStackCallback(void* arg)
+  virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SweepSystemWeaks(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 9b76d1a..cfc4f96 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -17,6 +17,9 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 #define ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 
+#include <stdint.h>
+#include <vector>
+
 #include "base/histogram.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
@@ -24,10 +27,16 @@
 #include "gc/gc_cause.h"
 #include "gc_root.h"
 #include "gc_type.h"
-#include <stdint.h>
-#include <vector>
+#include "object_callbacks.h"
 
 namespace art {
+
+namespace mirror {
+class Class;
+class Object;
+class Reference;
+}  // namespace mirror
+
 namespace gc {
 
 class Heap;
@@ -113,7 +122,7 @@
   DISALLOW_COPY_AND_ASSIGN(Iteration);
 };
 
-class GarbageCollector : public RootVisitor {
+class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public MarkObjectVisitor {
  public:
   class SCOPED_LOCKABLE ScopedPause {
    public:
@@ -172,6 +181,22 @@
   void RecordFreeLOS(const ObjectBytePair& freed);
   void DumpPerformanceInfo(std::ostream& os) LOCKS_EXCLUDED(pause_histogram_lock_);
 
+  // Helper functions for querying if objects are marked. These are used for processing references,
+  // and will be used for reading system weaks while the GC is running.
+  virtual mirror::Object* IsMarked(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  // Used by reference processor.
+  virtual void ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  // Force mark an object.
+  virtual mirror::Object* MarkObject(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+
  protected:
   // Run all of the GC phases.
   virtual void RunPhases() = 0;
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 3c247cd..0623fd4 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -21,34 +21,19 @@
 #include "base/timing_logger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/mod_union_table.h"
-#include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/reference_processor.h"
-#include "gc/space/bump_pointer_space.h"
 #include "gc/space/bump_pointer_space-inl.h"
-#include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "indirect_reference_table.h"
-#include "intern_table.h"
-#include "jni_internal.h"
-#include "mark_sweep-inl.h"
-#include "monitor.h"
 #include "mirror/class-inl.h"
-#include "mirror/class_loader.h"
-#include "mirror/dex_cache.h"
-#include "mirror/reference-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array.h"
-#include "mirror/object_array-inl.h"
 #include "runtime.h"
 #include "stack.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
-using ::art::mirror::Object;
-
 namespace art {
 namespace gc {
 namespace collector {
@@ -67,7 +52,7 @@
 
 MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix)
     : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "mark compact"),
-      space_(nullptr), collector_name_(name_) {
+      space_(nullptr), collector_name_(name_), updating_references_(false) {
 }
 
 void MarkCompact::RunPhases() {
@@ -107,7 +92,7 @@
   void operator()(mirror::Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_,
                                                                       Locks::heap_bitmap_lock_) {
     DCHECK_ALIGNED(obj, space::BumpPointerSpace::kAlignment);
-    DCHECK(collector_->IsMarked(obj));
+    DCHECK(collector_->IsMarked(obj) != nullptr);
     collector_->ForwardObject(obj);
   }
 
@@ -141,8 +126,7 @@
 void MarkCompact::ProcessReferences(Thread* self) {
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   heap_->GetReferenceProcessor()->ProcessReferences(
-      false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
-      &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this);
+      false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this);
 }
 
 class BitmapSetSlowPathVisitor {
@@ -156,29 +140,29 @@
   }
 };
 
-inline void MarkCompact::MarkObject(mirror::Object* obj) {
+inline mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
   if (obj == nullptr) {
-    return;
+    return nullptr;
   }
   if (kUseBakerOrBrooksReadBarrier) {
     // Verify all the objects have the correct forward pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
-    return;
-  }
-  if (objects_before_forwarding_->HasAddress(obj)) {
-    if (!objects_before_forwarding_->Set(obj)) {
-      MarkStackPush(obj);  // This object was not previously marked.
-    }
-  } else {
-    DCHECK(!space_->HasAddress(obj));
-    BitmapSetSlowPathVisitor visitor;
-    if (!mark_bitmap_->Set(obj, visitor)) {
-      // This object was not previously marked.
-      MarkStackPush(obj);
+  if (!immune_region_.ContainsObject(obj)) {
+    if (objects_before_forwarding_->HasAddress(obj)) {
+      if (!objects_before_forwarding_->Set(obj)) {
+        MarkStackPush(obj);  // This object was not previously marked.
+      }
+    } else {
+      DCHECK(!space_->HasAddress(obj));
+      BitmapSetSlowPathVisitor visitor;
+      if (!mark_bitmap_->Set(obj, visitor)) {
+        // This object was not previously marked.
+        MarkStackPush(obj);
+      }
     }
   }
+  return obj;
 }
 
 void MarkCompact::MarkingPhase() {
@@ -240,7 +224,7 @@
         TimingLogger::ScopedTiming t2(
             space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                      "UpdateAndMarkImageModUnionTable", GetTimings());
-        table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
+        table->UpdateAndMarkReferences(this);
       }
     }
   }
@@ -272,7 +256,7 @@
 }
 
 void MarkCompact::ResizeMarkStack(size_t new_size) {
-  std::vector<StackReference<Object>> temp(mark_stack_->Begin(), mark_stack_->End());
+  std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(), mark_stack_->End());
   CHECK_LE(mark_stack_->Size(), new_size);
   mark_stack_->Resize(new_size);
   for (auto& obj : temp) {
@@ -280,7 +264,7 @@
   }
 }
 
-inline void MarkCompact::MarkStackPush(Object* obj) {
+inline void MarkCompact::MarkStackPush(mirror::Object* obj) {
   if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
     ResizeMarkStack(mark_stack_->Capacity() * 2);
   }
@@ -288,23 +272,12 @@
   mark_stack_->PushBack(obj);
 }
 
-void MarkCompact::ProcessMarkStackCallback(void* arg) {
-  reinterpret_cast<MarkCompact*>(arg)->ProcessMarkStack();
-}
-
-mirror::Object* MarkCompact::MarkObjectCallback(mirror::Object* root, void* arg) {
-  reinterpret_cast<MarkCompact*>(arg)->MarkObject(root);
-  return root;
-}
-
-void MarkCompact::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr,
-                                            void* arg) {
-  reinterpret_cast<MarkCompact*>(arg)->MarkObject(obj_ptr->AsMirrorPtr());
-}
-
-void MarkCompact::DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
-                                                 void* arg) {
-  reinterpret_cast<MarkCompact*>(arg)->DelayReferenceReferent(klass, ref);
+void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) {
+  if (updating_references_) {
+    UpdateHeapReference(obj_ptr);
+  } else {
+    MarkObject(obj_ptr->AsMirrorPtr());
+  }
 }
 
 void MarkCompact::VisitRoots(
@@ -373,6 +346,7 @@
 
 void MarkCompact::UpdateReferences() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  updating_references_ = true;
   Runtime* runtime = Runtime::Current();
   // Update roots.
   UpdateRootVisitor update_root_visitor(this);
@@ -387,7 +361,7 @@
           space->IsZygoteSpace() ? "UpdateZygoteModUnionTableReferences" :
                                    "UpdateImageModUnionTableReferences",
                                    GetTimings());
-      table->UpdateAndMarkReferences(&UpdateHeapReferenceCallback, this);
+      table->UpdateAndMarkReferences(this);
     } else {
       // No mod union table, so we need to scan the space using bitmap visit.
       // Scan the space using bitmap visit.
@@ -403,14 +377,15 @@
   CHECK(!kMovingClasses)
       << "Didn't update large object classes since they are assumed to not move.";
   // Update the system weaks, these should already have been swept.
-  runtime->SweepSystemWeaks(&MarkedForwardingAddressCallback, this);
+  runtime->SweepSystemWeaks(this);
   // Update the objects in the bump pointer space last, these objects don't have a bitmap.
   UpdateObjectReferencesVisitor visitor(this);
   objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
                                                reinterpret_cast<uintptr_t>(space_->End()),
                                                visitor);
   // Update the reference processor cleared list.
-  heap_->GetReferenceProcessor()->UpdateRoots(&MarkedForwardingAddressCallback, this);
+  heap_->GetReferenceProcessor()->UpdateRoots(this);
+  updating_references_ = false;
 }
 
 void MarkCompact::Compact() {
@@ -436,10 +411,6 @@
   Runtime::Current()->VisitRoots(this);
 }
 
-mirror::Object* MarkCompact::MarkedForwardingAddressCallback(mirror::Object* obj, void* arg) {
-  return reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
-}
-
 inline void MarkCompact::UpdateHeapReference(mirror::HeapReference<mirror::Object>* reference) {
   mirror::Object* obj = reference->AsMirrorPtr();
   if (obj != nullptr) {
@@ -451,17 +422,12 @@
   }
 }
 
-void MarkCompact::UpdateHeapReferenceCallback(mirror::HeapReference<mirror::Object>* reference,
-                                              void* arg) {
-  reinterpret_cast<MarkCompact*>(arg)->UpdateHeapReference(reference);
-}
-
 class UpdateReferenceVisitor {
  public:
   explicit UpdateReferenceVisitor(MarkCompact* collector) : collector_(collector) {
   }
 
-  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const
       ALWAYS_INLINE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     collector_->UpdateHeapReference(obj->GetFieldObjectReferenceAddr<kVerifyNone>(offset));
   }
@@ -481,7 +447,7 @@
   obj->VisitReferences<kMovingClasses>(visitor, visitor);
 }
 
-inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) const {
+inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) {
   DCHECK(obj != nullptr);
   if (objects_before_forwarding_->HasAddress(obj)) {
     DCHECK(objects_before_forwarding_->Test(obj));
@@ -491,33 +457,30 @@
     return ret;
   }
   DCHECK(!space_->HasAddress(obj));
-  DCHECK(IsMarked(obj));
   return obj;
 }
 
-inline bool MarkCompact::IsMarked(const Object* object) const {
+mirror::Object* MarkCompact::IsMarked(mirror::Object* object) {
   if (immune_region_.ContainsObject(object)) {
-    return true;
+    return object;
+  }
+  if (updating_references_) {
+    return GetMarkedForwardAddress(object);
   }
   if (objects_before_forwarding_->HasAddress(object)) {
-    return objects_before_forwarding_->Test(object);
+    return objects_before_forwarding_->Test(object) ? object : nullptr;
   }
-  return mark_bitmap_->Test(object);
+  return mark_bitmap_->Test(object) ? object : nullptr;
 }
 
-mirror::Object* MarkCompact::IsMarkedCallback(mirror::Object* object, void* arg) {
-  return reinterpret_cast<MarkCompact*>(arg)->IsMarked(object) ? object : nullptr;
-}
-
-bool MarkCompact::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref_ptr,
-                                              void* arg) {
+bool MarkCompact::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* ref_ptr) {
   // Side effect free since we call this before ever moving objects.
-  return reinterpret_cast<MarkCompact*>(arg)->IsMarked(ref_ptr->AsMirrorPtr());
+  return IsMarked(ref_ptr->AsMirrorPtr()) != nullptr;
 }
 
 void MarkCompact::SweepSystemWeaks() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
+  Runtime::Current()->SweepSystemWeaks(this);
 }
 
 bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
@@ -592,8 +555,7 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void MarkCompact::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
-                                                         &HeapReferenceMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
 class MarkCompactMarkObjectVisitor {
@@ -601,7 +563,7 @@
   explicit MarkCompactMarkObjectVisitor(MarkCompact* collector) : collector_(collector) {
   }
 
-  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const ALWAYS_INLINE
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const ALWAYS_INLINE
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Object was already verified when we scanned it.
     collector_->MarkObject(obj->GetFieldObject<mirror::Object, kVerifyNone>(offset));
@@ -618,7 +580,7 @@
 };
 
 // Visit all of the references of an object and update.
-void MarkCompact::ScanObject(Object* obj) {
+void MarkCompact::ScanObject(mirror::Object* obj) {
   MarkCompactMarkObjectVisitor visitor(this);
   obj->VisitReferences<kMovingClasses>(visitor, visitor);
 }
@@ -627,7 +589,7 @@
 void MarkCompact::ProcessMarkStack() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   while (!mark_stack_->IsEmpty()) {
-    Object* obj = mark_stack_->PopBack();
+    mirror::Object* obj = mark_stack_->PopBack();
     DCHECK(obj != nullptr);
     ScanObject(obj);
   }
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index f59a2cd..89d66b5 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -121,23 +121,6 @@
                           const RootInfo& info)
       OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref_ptr,
-                                          void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  static void ProcessMarkStackCallback(void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
-  static void DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
-                                             void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   // Schedules an unmarked object for reference processing.
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -145,11 +128,7 @@
  protected:
   // Returns null if the object is not marked, otherwise returns the forwarding address (same as
   // object for non movable things).
-  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -184,30 +163,27 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
   // Update the references of objects by using the forwarding addresses.
   void UpdateReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-  static void UpdateRootCallback(mirror::Object** root, void* arg, const RootInfo& /*root_info*/)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
   // Move objects and restore lock words.
   void MoveObjects() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Move a single object to its forward address.
   void MoveObject(mirror::Object* obj, size_t len) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Mark a single object.
-  void MarkObject(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
-                                                                Locks::mutator_lock_);
-  bool IsMarked(const mirror::Object* obj) const
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual mirror::Object* IsMarked(mirror::Object* obj) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ForwardObject(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                                                                    Locks::mutator_lock_);
   // Update a single heap reference.
   void UpdateHeapReference(mirror::HeapReference<mirror::Object>* reference)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void UpdateHeapReferenceCallback(mirror::HeapReference<mirror::Object>* reference,
-                                          void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Update all of the references of a single object.
   void UpdateObjectReferences(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -242,6 +218,9 @@
   // Which lock words we need to restore as we are moving objects.
   std::deque<LockWord> lock_words_to_restore_;
 
+  // State whether or not we are updating references.
+  bool updating_references_;
+
  private:
   friend class BitmapSetSlowPathVisitor;
   friend class CalculateObjectForwardingAddressVisitor;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 1c9c412..abb1d3d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -37,7 +37,6 @@
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/reference_processor.h"
-#include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "mark_sweep-inl.h"
@@ -47,8 +46,6 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 
-using ::art::mirror::Object;
-
 namespace art {
 namespace gc {
 namespace collector {
@@ -175,8 +172,7 @@
 void MarkSweep::ProcessReferences(Thread* self) {
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
-      &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this);
+      true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this);
 }
 
 void MarkSweep::PausePhase() {
@@ -273,7 +269,7 @@
       TimingLogger::ScopedTiming t(name, GetTimings());
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
-      mod_union_table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
+      mod_union_table->UpdateAndMarkReferences(this);
     }
   }
 }
@@ -333,7 +329,7 @@
     // Someone else acquired the lock and expanded the mark stack before us.
     return;
   }
-  std::vector<StackReference<Object>> temp(mark_stack_->Begin(), mark_stack_->End());
+  std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(), mark_stack_->End());
   CHECK_LE(mark_stack_->Size(), new_size);
   mark_stack_->Resize(new_size);
   for (auto& obj : temp) {
@@ -341,7 +337,12 @@
   }
 }
 
-inline void MarkSweep::MarkObjectNonNullParallel(Object* obj) {
+mirror::Object* MarkSweep::MarkObject(mirror::Object* obj) {
+  MarkObject(obj, nullptr, MemberOffset(0));
+  return obj;
+}
+
+inline void MarkSweep::MarkObjectNonNullParallel(mirror::Object* obj) {
   DCHECK(obj != nullptr);
   if (MarkObjectParallel(obj)) {
     MutexLock mu(Thread::Current(), mark_stack_lock_);
@@ -353,28 +354,18 @@
   }
 }
 
-mirror::Object* MarkSweep::MarkObjectCallback(mirror::Object* obj, void* arg) {
-  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObject(obj);
-  return obj;
-}
-
-void MarkSweep::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* ref, void* arg) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
-}
-
-bool MarkSweep::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg) {
-  return reinterpret_cast<MarkSweep*>(arg)->IsMarked(ref->AsMirrorPtr());
+bool MarkSweep::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* ref) {
+  return IsMarked(ref->AsMirrorPtr());
 }
 
 class MarkSweepMarkObjectSlowPath {
  public:
-  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep, Object* holder = nullptr,
+  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep, mirror::Object* holder = nullptr,
                                        MemberOffset offset = MemberOffset(0))
       : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) {
   }
 
-  void operator()(const Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(const mirror::Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
     if (kProfileLargeObjects) {
       // TODO: Differentiate between marking and testing somehow.
       ++mark_sweep_->large_object_test_;
@@ -450,7 +441,8 @@
   MemberOffset offset_;
 };
 
-inline void MarkSweep::MarkObjectNonNull(Object* obj, Object* holder, MemberOffset offset) {
+inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder,
+                                         MemberOffset offset) {
   DCHECK(obj != nullptr);
   if (kUseBakerOrBrooksReadBarrier) {
     // Verify all the objects have the correct pointer installed.
@@ -481,7 +473,7 @@
   }
 }
 
-inline void MarkSweep::PushOnMarkStack(Object* obj) {
+inline void MarkSweep::PushOnMarkStack(mirror::Object* obj) {
   if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
     // Lock is not needed but is here anyways to please annotalysis.
     MutexLock mu(Thread::Current(), mark_stack_lock_);
@@ -491,14 +483,14 @@
   mark_stack_->PushBack(obj);
 }
 
-inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
+inline bool MarkSweep::MarkObjectParallel(mirror::Object* obj) {
   DCHECK(obj != nullptr);
   if (kUseBakerOrBrooksReadBarrier) {
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
   if (immune_region_.ContainsObject(obj)) {
-    DCHECK(IsMarked(obj));
+    DCHECK(IsMarked(obj) != nullptr);
     return false;
   }
   // Try to take advantage of locality of references within a space, failing this find the space
@@ -511,8 +503,13 @@
   return !mark_bitmap_->AtomicTestAndSet(obj, visitor);
 }
 
+void MarkSweep::MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) {
+  MarkObject(ref->AsMirrorPtr(), nullptr, MemberOffset(0));
+}
+
 // Used to mark objects when processing the mark stack. If an object is null, it is not marked.
-inline void MarkSweep::MarkObject(Object* obj, Object* holder, MemberOffset offset) {
+inline void MarkSweep::MarkObject(mirror::Object* obj, mirror::Object* holder,
+                                  MemberOffset offset) {
   if (obj != nullptr) {
     MarkObjectNonNull(obj, holder, offset);
   } else if (kCountMarkedObjects) {
@@ -526,7 +523,7 @@
 
   void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
-    CHECK(collector_->IsMarked(root)) << info.ToString();
+    CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
   }
 
  private:
@@ -599,7 +596,8 @@
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
       : mark_sweep_(mark_sweep) {}
 
-  void operator()(Object* obj) const ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  void operator()(mirror::Object* obj) const ALWAYS_INLINE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
@@ -631,7 +629,7 @@
 class MarkStackTask : public Task {
  public:
   MarkStackTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, size_t mark_stack_size,
-                StackReference<Object>* mark_stack)
+                StackReference<mirror::Object>* mark_stack)
       : mark_sweep_(mark_sweep),
         thread_pool_(thread_pool),
         mark_stack_pos_(mark_stack_size) {
@@ -655,7 +653,7 @@
                                        MarkSweep* mark_sweep) ALWAYS_INLINE
             : chunk_task_(chunk_task), mark_sweep_(mark_sweep) {}
 
-    void operator()(Object* obj, MemberOffset offset, bool /* static */) const ALWAYS_INLINE
+    void operator()(mirror::Object* obj, MemberOffset offset, bool /* static */) const ALWAYS_INLINE
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
       if (ref != nullptr && mark_sweep_->MarkObjectParallel(ref)) {
@@ -681,7 +679,7 @@
         : chunk_task_(chunk_task) {}
 
     // No thread safety analysis since multiple threads will use this visitor.
-    void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
       MarkSweep* const mark_sweep = chunk_task_->mark_sweep_;
       MarkObjectParallelVisitor mark_visitor(chunk_task_, mark_sweep);
@@ -704,11 +702,12 @@
   MarkSweep* const mark_sweep_;
   ThreadPool* const thread_pool_;
   // Thread local mark stack for this task.
-  StackReference<Object> mark_stack_[kMaxSize];
+  StackReference<mirror::Object> mark_stack_[kMaxSize];
   // Mark stack position.
   size_t mark_stack_pos_;
 
-  ALWAYS_INLINE void MarkStackPush(Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE void MarkStackPush(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(mark_stack_pos_ == kMaxSize)) {
       // Mark stack overflow, give 1/2 the stack to the thread pool as a new work task.
       mark_stack_pos_ /= 2;
@@ -732,12 +731,12 @@
     ScanObjectParallelVisitor visitor(this);
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
-    BoundedFifoPowerOfTwo<Object*, kFifoSize> prefetch_fifo;
+    BoundedFifoPowerOfTwo<mirror::Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      Object* obj = nullptr;
+      mirror::Object* obj = nullptr;
       if (kUseMarkStackPrefetch) {
         while (mark_stack_pos_ != 0 && prefetch_fifo.size() < kFifoSize) {
-          Object* const mark_stack_obj = mark_stack_[--mark_stack_pos_].AsMirrorPtr();
+          mirror::Object* const mark_stack_obj = mark_stack_[--mark_stack_pos_].AsMirrorPtr();
           DCHECK(mark_stack_obj != nullptr);
           __builtin_prefetch(mark_stack_obj);
           prefetch_fifo.push_back(mark_stack_obj);
@@ -764,7 +763,7 @@
   CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
                accounting::ContinuousSpaceBitmap* bitmap,
                uint8_t* begin, uint8_t* end, uint8_t minimum_age, size_t mark_stack_size,
-               StackReference<Object>* mark_stack_obj, bool clear_card)
+               StackReference<mirror::Object>* mark_stack_obj, bool clear_card)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
         bitmap_(bitmap),
         begin_(begin),
@@ -815,8 +814,8 @@
     TimingLogger::ScopedTiming t(paused ? "(Paused)ScanGrayObjects" : __FUNCTION__,
         GetTimings());
     // Try to take some of the mark stack since we can pass this off to the worker tasks.
-    StackReference<Object>* mark_stack_begin = mark_stack_->Begin();
-    StackReference<Object>* mark_stack_end = mark_stack_->End();
+    StackReference<mirror::Object>* mark_stack_begin = mark_stack_->Begin();
+    StackReference<mirror::Object>* mark_stack_end = mark_stack_->End();
     const size_t mark_stack_size = mark_stack_end - mark_stack_begin;
     // Estimated number of work tasks we will create.
     const size_t mark_stack_tasks = GetHeap()->GetContinuousSpaces().size() * thread_count;
@@ -832,8 +831,8 @@
       // Align up the end address. For example, the image space's end
       // may not be card-size-aligned.
       card_end = AlignUp(card_end, accounting::CardTable::kCardSize);
-      DCHECK(IsAligned<accounting::CardTable::kCardSize>(card_begin));
-      DCHECK(IsAligned<accounting::CardTable::kCardSize>(card_end));
+      DCHECK_ALIGNED(card_begin, accounting::CardTable::kCardSize);
+      DCHECK_ALIGNED(card_end, accounting::CardTable::kCardSize);
       // Calculate how many bytes of heap we will scan,
       const size_t address_range = card_end - card_begin;
       // Calculate how much address range each task gets.
@@ -988,13 +987,6 @@
   ProcessMarkStack(false);
 }
 
-mirror::Object* MarkSweep::IsMarkedCallback(mirror::Object* object, void* arg) {
-  if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
-    return object;
-  }
-  return nullptr;
-}
-
 void MarkSweep::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
   ScanGrayObjects(paused, minimum_age);
   ProcessMarkStack(paused);
@@ -1014,17 +1006,24 @@
 
 void MarkSweep::SweepSystemWeaks(Thread* self) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
+  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  Runtime::Current()->SweepSystemWeaks(this);
 }
 
-mirror::Object* MarkSweep::VerifySystemWeakIsLiveCallback(Object* obj, void* arg) {
-  reinterpret_cast<MarkSweep*>(arg)->VerifyIsLive(obj);
-  // We don't actually want to sweep the object, so lets return "marked"
-  return obj;
-}
+class VerifySystemWeakVisitor : public IsMarkedVisitor {
+ public:
+  explicit VerifySystemWeakVisitor(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {}
 
-void MarkSweep::VerifyIsLive(const Object* obj) {
+  virtual mirror::Object* IsMarked(mirror::Object* obj) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    mark_sweep_->VerifyIsLive(obj);
+    return obj;
+  }
+
+  MarkSweep* const mark_sweep_;
+};
+
+void MarkSweep::VerifyIsLive(const mirror::Object* obj) {
   if (!heap_->GetLiveBitmap()->Test(obj)) {
     // TODO: Consider live stack? Has this code bitrotted?
     CHECK(!heap_->allocation_stack_->Contains(obj))
@@ -1035,7 +1034,8 @@
 void MarkSweep::VerifySystemWeaks() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Verify system weaks, uses a special object visitor which returns the input object.
-  Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
+  VerifySystemWeakVisitor visitor(this);
+  Runtime::Current()->SweepSystemWeaks(&visitor);
 }
 
 class CheckpointMarkThreadRoots : public Closure, public RootVisitor {
@@ -1122,7 +1122,7 @@
   ObjectBytePair freed;
   ObjectBytePair freed_los;
   // How many objects are left in the array, modified after each space is swept.
-  StackReference<Object>* objects = allocations->Begin();
+  StackReference<mirror::Object>* objects = allocations->Begin();
   size_t count = allocations->Size();
   // Change the order to ensure that the non-moving space last swept as an optimization.
   std::vector<space::ContinuousSpace*> sweep_spaces;
@@ -1150,9 +1150,9 @@
     if (swap_bitmaps) {
       std::swap(live_bitmap, mark_bitmap);
     }
-    StackReference<Object>* out = objects;
+    StackReference<mirror::Object>* out = objects;
     for (size_t i = 0; i < count; ++i) {
-      Object* const obj = objects[i].AsMirrorPtr();
+      mirror::Object* const obj = objects[i].AsMirrorPtr();
       if (kUseThreadLocalAllocationStack && obj == nullptr) {
         continue;
       }
@@ -1191,7 +1191,7 @@
       std::swap(large_live_objects, large_mark_objects);
     }
     for (size_t i = 0; i < count; ++i) {
-      Object* const obj = objects[i].AsMirrorPtr();
+      mirror::Object* const obj = objects[i].AsMirrorPtr();
       // Handle large objects.
       if (kUseThreadLocalAllocationStack && obj == nullptr) {
         continue;
@@ -1250,16 +1250,15 @@
   if (kCountJavaLangRefs) {
     ++reference_count_;
   }
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, &HeapReferenceMarkedCallback,
-                                                         this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
 }
 
-class MarkObjectVisitor {
+class MarkVisitor {
  public:
-  explicit MarkObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {
+  explicit MarkVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {
   }
 
-  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) const
       ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     if (kCheckLocks) {
@@ -1275,16 +1274,12 @@
 
 // Scans an object reference.  Determines the type of the reference
 // and dispatches to a specialized scanning routine.
-void MarkSweep::ScanObject(Object* obj) {
-  MarkObjectVisitor mark_visitor(this);
+void MarkSweep::ScanObject(mirror::Object* obj) {
+  MarkVisitor mark_visitor(this);
   DelayReferenceReferentVisitor ref_visitor(this);
   ScanObjectVisit(obj, mark_visitor, ref_visitor);
 }
 
-void MarkSweep::ProcessMarkStackCallback(void* arg) {
-  reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(false);
-}
-
 void MarkSweep::ProcessMarkStackParallel(size_t thread_count) {
   Thread* self = Thread::Current();
   ThreadPool* thread_pool = GetHeap()->GetThreadPool();
@@ -1317,12 +1312,12 @@
   } else {
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
-    BoundedFifoPowerOfTwo<Object*, kFifoSize> prefetch_fifo;
+    BoundedFifoPowerOfTwo<mirror::Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      Object* obj = nullptr;
+      mirror::Object* obj = nullptr;
       if (kUseMarkStackPrefetch) {
         while (!mark_stack_->IsEmpty() && prefetch_fifo.size() < kFifoSize) {
-          Object* mark_stack_obj = mark_stack_->PopBack();
+          mirror::Object* mark_stack_obj = mark_stack_->PopBack();
           DCHECK(mark_stack_obj != nullptr);
           __builtin_prefetch(mark_stack_obj);
           prefetch_fifo.push_back(mark_stack_obj);
@@ -1344,14 +1339,14 @@
   }
 }
 
-inline bool MarkSweep::IsMarked(const Object* object) const {
+inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) {
   if (immune_region_.ContainsObject(object)) {
-    return true;
+    return object;
   }
   if (current_space_bitmap_->HasAddress(object)) {
-    return current_space_bitmap_->Test(object);
+    return current_space_bitmap_->Test(object) ? object : nullptr;
   }
-  return mark_bitmap_->Test(object);
+  return mark_bitmap_->Test(object) ? object : nullptr;
 }
 
 void MarkSweep::FinishPhase() {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index d29d87a..7692b06 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -170,18 +170,9 @@
 
   // Verify that an object is live, either in a live bitmap or in the allocation stack.
   void VerifyIsLive(const mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkObjectCallback(mirror::Object* obj, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
+  virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -194,13 +185,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void ProcessMarkStackCallback(void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Marks an object.
-  void MarkObject(mirror::Object* obj, mirror::Object* holder = nullptr,
-                  MemberOffset offset = MemberOffset(0))
+  virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -213,16 +205,10 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
  protected:
-  // Returns true if the object has its bit set in the mark bitmap.
-  bool IsMarked(const mirror::Object* object) const
+  // Returns object if the object is marked in the heap bitmap, otherwise null.
+  virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   void MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder = nullptr,
                          MemberOffset offset = MemberOffset(0))
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -233,7 +219,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if we need to add obj to a mark stack.
-  bool MarkObjectParallel(const mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
+  bool MarkObjectParallel(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
   // Verify the roots of the heap and print out information related to any invalid roots.
   // Called in MarkObject, so may we may not hold the mutator lock.
@@ -258,6 +244,11 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  virtual void ProcessMarkStack() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ProcessMarkStack(false);
+  }
+
   // Recursively blackens objects on the mark stack.
   void ProcessMarkStack(bool paused)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 7b19dc9..a7de44f 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -34,7 +34,7 @@
   void operator()(const mirror::Object* obj) const {
     CHECK(!semi_space_->to_space_->HasAddress(obj)) << "Marking " << obj << " in to_space_";
     // Marking a large object, make sure its aligned as a sanity check.
-    CHECK(IsAligned<kPageSize>(obj));
+    CHECK_ALIGNED(obj, kPageSize);
   }
 
  private:
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 82d02e7..2a9f47a 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -157,8 +157,7 @@
 void SemiSpace::ProcessReferences(Thread* self) {
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
-      &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this);
+      false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this);
 }
 
 void SemiSpace::MarkingPhase() {
@@ -336,7 +335,7 @@
           space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                    "UpdateAndMarkImageModUnionTable",
                                    GetTimings());
-      table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
+      table->UpdateAndMarkReferences(this);
       DCHECK(GetHeap()->FindRememberedSetFromSpace(space) == nullptr);
     } else if (collect_from_space_only_ && space->GetLiveBitmap() != nullptr) {
       // If the space has no mod union table (the non-moving space and main spaces when the bump
@@ -351,8 +350,7 @@
       CHECK_EQ(rem_set != nullptr, kUseRememberedSet);
       if (rem_set != nullptr) {
         TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
-        rem_set->UpdateAndMarkReferences(MarkHeapReferenceCallback, DelayReferenceReferentCallback,
-                                         from_space_, this);
+        rem_set->UpdateAndMarkReferences(from_space_, this);
         if (kIsDebugBuild) {
           // Verify that there are no from-space references that
           // remain in the space, that is, the remembered set (and the
@@ -583,24 +581,14 @@
   return forward_address;
 }
 
-void SemiSpace::ProcessMarkStackCallback(void* arg) {
-  reinterpret_cast<SemiSpace*>(arg)->ProcessMarkStack();
-}
-
-mirror::Object* SemiSpace::MarkObjectCallback(mirror::Object* root, void* arg) {
+mirror::Object* SemiSpace::MarkObject(mirror::Object* root) {
   auto ref = StackReference<mirror::Object>::FromMirrorPtr(root);
-  reinterpret_cast<SemiSpace*>(arg)->MarkObject(&ref);
+  MarkObject(&ref);
   return ref.AsMirrorPtr();
 }
 
-void SemiSpace::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr,
-                                          void* arg) {
-  reinterpret_cast<SemiSpace*>(arg)->MarkObject(obj_ptr);
-}
-
-void SemiSpace::DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
-                                               void* arg) {
-  reinterpret_cast<SemiSpace*>(arg)->DelayReferenceReferent(klass, ref);
+void SemiSpace::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) {
+  MarkObject(obj_ptr);
 }
 
 void SemiSpace::VisitRoots(mirror::Object*** roots, size_t count,
@@ -628,29 +616,9 @@
   Runtime::Current()->VisitRoots(this);
 }
 
-bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
-                                            void* arg) {
-  mirror::Object* obj = object->AsMirrorPtr();
-  mirror::Object* new_obj =
-      reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(obj);
-  if (new_obj == nullptr) {
-    return false;
-  }
-  if (new_obj != obj) {
-    // Write barrier is not necessary since it still points to the same object, just at a different
-    // address.
-    object->Assign(new_obj);
-  }
-  return true;
-}
-
-mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* object, void* arg) {
-  return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
-}
-
 void SemiSpace::SweepSystemWeaks() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this);
+  Runtime::Current()->SweepSystemWeaks(this);
 }
 
 bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
@@ -688,8 +656,7 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
-                                                         &HeapReferenceMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
 class SemiSpaceMarkObjectVisitor {
@@ -746,8 +713,7 @@
   }
 }
 
-inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+mirror::Object* SemiSpace::IsMarked(mirror::Object* obj) {
   // All immune objects are assumed marked.
   if (from_space_->HasAddress(obj)) {
     // Returns either the forwarding address or null.
@@ -759,6 +725,20 @@
   return mark_bitmap_->Test(obj) ? obj : nullptr;
 }
 
+bool SemiSpace::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* object) {
+  mirror::Object* obj = object->AsMirrorPtr();
+  mirror::Object* new_obj = IsMarked(obj);
+  if (new_obj == nullptr) {
+    return false;
+  }
+  if (new_obj != obj) {
+    // Write barrier is not necessary since it still points to the same object, just at a different
+    // address.
+    object->Assign(new_obj);
+  }
+  return true;
+}
+
 void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
   DCHECK(to_space != nullptr);
   to_space_ = to_space;
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 3c25f53..6b7ea0d 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -103,6 +103,12 @@
   void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
+  virtual mirror::Object* MarkObject(mirror::Object* root) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
   void ScanObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
@@ -140,19 +146,6 @@
                           const RootInfo& info) OVERRIDE
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  static void ProcessMarkStackCallback(void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
-  static void DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
-                                             void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
@@ -163,15 +156,11 @@
  protected:
   // Returns null if the object is not marked, otherwise returns the forwarding address (same as
   // object for non movable things).
-  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const
+  virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+  virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* object) OVERRIDE
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6317351..2b94cf1 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1431,10 +1431,10 @@
   if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.LoadRelaxed()) < 10 * KB)) {
     return;
   }
-  CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj;
+  CHECK_ALIGNED(obj, kObjectAlignment) << "Object isn't aligned";
   mirror::Class* c = obj->GetFieldObject<mirror::Class, kVerifyNone>(mirror::Object::ClassOffset());
   CHECK(c != nullptr) << "Null class in object " << obj;
-  CHECK(IsAligned<kObjectAlignment>(c)) << "Class " << c << " not aligned in object " << obj;
+  CHECK_ALIGNED(c, kObjectAlignment) << "Class " << c << " not aligned in object " << obj;
   CHECK(VerifyClassClass(c));
 
   if (verify_object_mode_ > kVerifyObjectModeFast) {
@@ -3055,8 +3055,13 @@
   }
 }
 
-static void IdentityMarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>*, void*) {
-}
+struct IdentityMarkHeapReferenceVisitor : public MarkObjectVisitor {
+  virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {
+    return obj;
+  }
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>*) OVERRIDE {
+  }
+};
 
 void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) {
   Thread* const self = Thread::Current();
@@ -3085,7 +3090,8 @@
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
-      mod_union_table->UpdateAndMarkReferences(IdentityMarkHeapReferenceCallback, nullptr);
+      IdentityMarkHeapReferenceVisitor visitor;
+      mod_union_table->UpdateAndMarkReferences(&visitor);
       mod_union_table->Verify();
     }
   }
@@ -3714,11 +3720,11 @@
   }
 }
 
-void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const {
+void Heap::SweepAllocationRecords(IsMarkedVisitor* visitor) const {
   if (IsAllocTrackingEnabled()) {
     MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
     if (IsAllocTrackingEnabled()) {
-      GetAllocationRecords()->SweepAllocationRecords(visitor, arg);
+      GetAllocationRecords()->SweepAllocationRecords(visitor);
     }
   }
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 2df5a4e..ee3d510 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -705,7 +705,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
 
-  void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const
+  void SweepAllocationRecords(IsMarkedVisitor* visitor) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index c08ed0e..39ba743 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -17,6 +17,7 @@
 #include "reference_processor.h"
 
 #include "base/time_utils.h"
+#include "collector/garbage_collector.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
@@ -34,7 +35,7 @@
 static constexpr bool kAsyncReferenceQueueAdd = false;
 
 ReferenceProcessor::ReferenceProcessor()
-    : process_references_args_(nullptr, nullptr, nullptr),
+    : collector_(nullptr),
       preserving_references_(false),
       condition_("reference processor condition", *Locks::reference_processor_lock_) ,
       soft_reference_queue_(Locks::reference_queue_soft_references_lock_),
@@ -83,16 +84,14 @@
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
     // it to the mutator as long as the GC is not preserving references.
-    IsHeapReferenceMarkedCallback* const is_marked_callback =
-        process_references_args_.is_marked_callback_;
-    if (LIKELY(is_marked_callback != nullptr)) {
+    if (LIKELY(collector_ != nullptr)) {
       // If it's null it means not marked, but it could become marked if the referent is reachable
       // by finalizer referents. So we can not return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
       // in the heap causing corruption since this field would get swept.
-      if (is_marked_callback(referent_addr, process_references_args_.arg_)) {
+      if (collector_->IsMarkedHeapReference(referent_addr)) {
         if (!preserving_references_ ||
            (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
           return referent_addr->AsMirrorPtr();
@@ -104,16 +103,6 @@
   return reference->GetReferent();
 }
 
-bool ReferenceProcessor::PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj,
-                                                       void* arg) {
-  auto* const args = reinterpret_cast<ProcessReferencesArgs*>(arg);
-  // TODO: Add smarter logic for preserving soft references.
-  mirror::Object* new_obj = args->mark_callback_(obj->AsMirrorPtr(), args->arg_);
-  DCHECK(new_obj != nullptr);
-  obj->Assign(new_obj);
-  return true;
-}
-
 void ReferenceProcessor::StartPreservingReferences(Thread* self) {
   MutexLock mu(self, *Locks::reference_processor_lock_);
   preserving_references_ = true;
@@ -129,17 +118,12 @@
 // Process reference class instances and schedule finalizations.
 void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timings,
                                            bool clear_soft_references,
-                                           IsHeapReferenceMarkedCallback* is_marked_callback,
-                                           MarkObjectCallback* mark_object_callback,
-                                           ProcessMarkStackCallback* process_mark_stack_callback,
-                                           void* arg) {
+                                           collector::GarbageCollector* collector) {
   TimingLogger::ScopedTiming t(concurrent ? __FUNCTION__ : "(Paused)ProcessReferences", timings);
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, *Locks::reference_processor_lock_);
-    process_references_args_.is_marked_callback_ = is_marked_callback;
-    process_references_args_.mark_callback_ = mark_object_callback;
-    process_references_args_.arg_ = arg;
+    collector_ = collector;
     if (!kUseReadBarrier) {
       CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent";
     } else {
@@ -154,16 +138,17 @@
     if (concurrent) {
       StartPreservingReferences(self);
     }
-    soft_reference_queue_.ForwardSoftReferences(&PreserveSoftReferenceCallback,
-                                                &process_references_args_);
-    process_mark_stack_callback(arg);
+    // TODO: Add smarter logic for preserving soft references. The behavior should be a conditional
+    // mark if the SoftReference is supposed to be preserved.
+    soft_reference_queue_.ForwardSoftReferences(collector);
+    collector->ProcessMarkStack();
     if (concurrent) {
       StopPreservingReferences(self);
     }
   }
   // Clear all remaining soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, collector);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, collector);
   {
     TimingLogger::ScopedTiming t2(concurrent ? "EnqueueFinalizerReferences" :
         "(Paused)EnqueueFinalizerReferences", timings);
@@ -171,18 +156,17 @@
       StartPreservingReferences(self);
     }
     // Preserve all white objects with finalize methods and schedule them for finalization.
-    finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, is_marked_callback,
-                                                          mark_object_callback, arg);
-    process_mark_stack_callback(arg);
+    finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, collector);
+    collector->ProcessMarkStack();
     if (concurrent) {
       StopPreservingReferences(self);
     }
   }
   // Clear all finalizer referent reachable soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, collector);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, collector);
   // Clear all phantom references with white referents.
-  phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, collector);
   // At this point all reference queues other than the cleared references should be empty.
   DCHECK(soft_reference_queue_.IsEmpty());
   DCHECK(weak_reference_queue_.IsEmpty());
@@ -194,12 +178,10 @@
     // could result in a stale is_marked_callback_ being called before the reference processing
     // starts since there is a small window of time where slow_path_enabled_ is enabled but the
     // callback isn't yet set.
-    process_references_args_.is_marked_callback_ = nullptr;
-    if (!kUseReadBarrier) {
-      if (concurrent) {
-        // Done processing, disable the slow path and broadcast to the waiters.
-        DisableSlowPath(self);
-      }
+    collector_ = nullptr;
+    if (!kUseReadBarrier && concurrent) {
+      // Done processing, disable the slow path and broadcast to the waiters.
+      DisableSlowPath(self);
     }
   }
 }
@@ -207,13 +189,12 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void ReferenceProcessor::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                                                IsHeapReferenceMarkedCallback* is_marked_callback,
-                                                void* arg) {
+                                                collector::GarbageCollector* collector) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
   DCHECK(klass != nullptr);
   DCHECK(klass->IsTypeOfReferenceClass());
   mirror::HeapReference<mirror::Object>* referent = ref->GetReferentReferenceAddr();
-  if (referent->AsMirrorPtr() != nullptr && !is_marked_callback(referent, arg)) {
+  if (referent->AsMirrorPtr() != nullptr && !collector->IsMarkedHeapReference(referent)) {
     Thread* self = Thread::Current();
     // TODO: Remove these locks, and use atomic stacks for storing references?
     // We need to check that the references haven't already been enqueued since we can end up
@@ -233,8 +214,8 @@
   }
 }
 
-void ReferenceProcessor::UpdateRoots(IsMarkedCallback* callback, void* arg) {
-  cleared_references_.UpdateRoots(callback, arg);
+void ReferenceProcessor::UpdateRoots(IsMarkedVisitor* visitor) {
+  cleared_references_.UpdateRoots(visitor);
 }
 
 class ClearedReferenceTask : public HeapTask {
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 284d13c..95877d1 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -28,6 +28,7 @@
 class TimingLogger;
 
 namespace mirror {
+class Class;
 class FinalizerReference;
 class Object;
 class Reference;
@@ -35,18 +36,18 @@
 
 namespace gc {
 
+namespace collector {
+class GarbageCollector;
+}  // namespace collector
+
 class Heap;
 
 // Used to process java.lang.References concurrently or paused.
 class ReferenceProcessor {
  public:
   explicit ReferenceProcessor();
-  static bool PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ProcessReferences(bool concurrent, TimingLogger* timings, bool clear_soft_references,
-                         IsHeapReferenceMarkedCallback* is_marked_callback,
-                         MarkObjectCallback* mark_object_callback,
-                         ProcessMarkStackCallback* process_mark_stack_callback, void* arg)
+                         gc::collector::GarbageCollector* collector)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       LOCKS_EXCLUDED(Locks::reference_processor_lock_);
@@ -60,9 +61,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::reference_processor_lock_);
   void EnqueueClearedReferences(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_);
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                              IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
+                              collector::GarbageCollector* collector)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void UpdateRoots(IsMarkedCallback* callback, void* arg)
+  void UpdateRoots(IsMarkedVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
   // Make a circular list with reference if it is not enqueued. Uses the finalizer queue lock.
   bool MakeCircularListIfUnenqueued(mirror::FinalizerReference* reference)
@@ -71,21 +72,6 @@
                      Locks::reference_queue_finalizer_references_lock_);
 
  private:
-  class ProcessReferencesArgs {
-   public:
-    ProcessReferencesArgs(IsHeapReferenceMarkedCallback* is_marked_callback,
-                          MarkObjectCallback* mark_callback, void* arg)
-        : is_marked_callback_(is_marked_callback), mark_callback_(mark_callback), arg_(arg) {
-    }
-
-    // The is marked callback is null when the args aren't set up.
-    IsHeapReferenceMarkedCallback* is_marked_callback_;
-    MarkObjectCallback* mark_callback_;
-    void* arg_;
-
-   private:
-    DISALLOW_IMPLICIT_CONSTRUCTORS(ProcessReferencesArgs);
-  };
   bool SlowPathEnabled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Called by ProcessReferences.
   void DisableSlowPath(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(Locks::reference_processor_lock_)
@@ -95,8 +81,9 @@
   // referents.
   void StartPreservingReferences(Thread* self) LOCKS_EXCLUDED(Locks::reference_processor_lock_);
   void StopPreservingReferences(Thread* self) LOCKS_EXCLUDED(Locks::reference_processor_lock_);
-  // Process args, used by the GetReferent to return referents which are already marked.
-  ProcessReferencesArgs process_references_args_ GUARDED_BY(Locks::reference_processor_lock_);
+  // Collector which is clearing references, used by the GetReferent to return referents which are
+  // already marked.
+  collector::GarbageCollector* collector_ GUARDED_BY(Locks::reference_processor_lock_);
   // Boolean for whether or not we are preserving references (either soft references or finalizers).
   // If this is true, then we cannot return a referent (see comment in GetReferent).
   bool preserving_references_ GUARDED_BY(Locks::reference_processor_lock_);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 4ba3983..f505428 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -137,12 +137,12 @@
 }
 
 void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references,
-                                          IsHeapReferenceMarkedCallback* preserve_callback,
-                                          void* arg) {
+                                          collector::GarbageCollector* collector) {
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
-    if (referent_addr->AsMirrorPtr() != nullptr && !preserve_callback(referent_addr, arg)) {
+    if (referent_addr->AsMirrorPtr() != nullptr &&
+        !collector->IsMarkedHeapReference(referent_addr)) {
       // Referent is white, clear it.
       if (Runtime::Current()->IsActiveTransaction()) {
         ref->ClearReferent<true>();
@@ -157,14 +157,13 @@
 }
 
 void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
-                                                IsHeapReferenceMarkedCallback* is_marked_callback,
-                                                MarkObjectCallback* mark_object_callback,
-                                                void* arg) {
+                                                collector::GarbageCollector* collector) {
   while (!IsEmpty()) {
     mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
-    if (referent_addr->AsMirrorPtr() != nullptr && !is_marked_callback(referent_addr, arg)) {
-      mirror::Object* forward_address = mark_object_callback(referent_addr->AsMirrorPtr(), arg);
+    if (referent_addr->AsMirrorPtr() != nullptr &&
+        !collector->IsMarkedHeapReference(referent_addr)) {
+      mirror::Object* forward_address = collector->MarkObject(referent_addr->AsMirrorPtr());
       // If the referent is non-null the reference must queuable.
       DCHECK(ref->IsEnqueuable());
       // Move the updated referent to the zombie field.
@@ -180,8 +179,7 @@
   }
 }
 
-void ReferenceQueue::ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback,
-                                           void* arg) {
+void ReferenceQueue::ForwardSoftReferences(MarkObjectVisitor* visitor) {
   if (UNLIKELY(IsEmpty())) {
     return;
   }
@@ -190,15 +188,15 @@
   do {
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
     if (referent_addr->AsMirrorPtr() != nullptr) {
-      UNUSED(preserve_callback(referent_addr, arg));
+      visitor->MarkHeapReference(referent_addr);
     }
     ref = ref->GetPendingNext();
   } while (LIKELY(ref != head));
 }
 
-void ReferenceQueue::UpdateRoots(IsMarkedCallback* callback, void* arg) {
+void ReferenceQueue::UpdateRoots(IsMarkedVisitor* visitor) {
   if (list_ != nullptr) {
-    list_ = down_cast<mirror::Reference*>(callback(list_, arg));
+    list_ = down_cast<mirror::Reference*>(visitor->IsMarked(list_));
   }
 }
 
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index c45be85..7d9ddf6 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -36,6 +36,10 @@
 
 namespace gc {
 
+namespace collector {
+class GarbageCollector;
+}  // namespace collector
+
 class Heap;
 
 // Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
@@ -65,20 +69,19 @@
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to
   // the zombie field, and the referent field is cleared.
   void EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
-                                  IsHeapReferenceMarkedCallback* is_marked_callback,
-                                  MarkObjectCallback* mark_object_callback, void* arg)
+                                  collector::GarbageCollector* collector)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback, void* arg)
+  void ForwardSoftReferences(MarkObjectVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Unlink the reference list clearing references objects with white referents. Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
   void ClearWhiteReferences(ReferenceQueue* cleared_references,
-                            IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
+                            collector::GarbageCollector* collector)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -95,7 +98,7 @@
   }
 
   // Visits list_, currently only used for the mark compact GC.
-  void UpdateRoots(IsMarkedCallback* callback, void* arg)
+  void UpdateRoots(IsMarkedVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index d9ad9a3..338a41e 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -63,7 +63,7 @@
 }
 
 inline mirror::Object* BumpPointerSpace::AllocNonvirtualWithoutAccounting(size_t num_bytes) {
-  DCHECK(IsAligned<kAlignment>(num_bytes));
+  DCHECK_ALIGNED(num_bytes, kAlignment);
   uint8_t* old_end;
   uint8_t* new_end;
   do {
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index a913e59..2798b21 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -440,7 +440,7 @@
       AllocationInfo* next_next_info = next_info->GetNextInfo();
       // Next next info can't be free since we always coalesce.
       DCHECK(!next_next_info->IsFree());
-      DCHECK(IsAligned<kAlignment>(next_next_info->ByteSize()));
+      DCHECK_ALIGNED(next_next_info->ByteSize(), kAlignment);
       new_free_info = next_next_info;
       new_free_size += next_next_info->GetPrevFreeBytes();
       RemoveFreePrev(next_next_info);
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index b014217..3a0d814 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -46,8 +46,8 @@
   if (create_bitmaps) {
     size_t bitmap_index = bitmap_index_++;
     static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+    CHECK_ALIGNED(reinterpret_cast<uintptr_t>(mem_map->Begin()), kGcCardSize);
+    CHECK_ALIGNED(reinterpret_cast<uintptr_t>(mem_map->End()), kGcCardSize);
     live_bitmap_.reset(accounting::ContinuousSpaceBitmap::Create(
         StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
         Begin(), NonGrowthLimitCapacity()));
@@ -164,10 +164,10 @@
   // alloc spaces.
   RevokeAllThreadLocalBuffers();
   SetEnd(reinterpret_cast<uint8_t*>(RoundUp(reinterpret_cast<uintptr_t>(End()), kPageSize)));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(End()));
-  DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(End()));
+  DCHECK_ALIGNED(begin_, accounting::CardTable::kCardSize);
+  DCHECK_ALIGNED(End(), accounting::CardTable::kCardSize);
+  DCHECK_ALIGNED(begin_, kPageSize);
+  DCHECK_ALIGNED(End(), kPageSize);
   size_t size = RoundUp(Size(), kPageSize);
   // Trimming the heap should be done by the caller since we may have invalidated the accounting
   // stored in between objects.
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 1cdf69d..db005f7 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -43,7 +43,7 @@
 inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated,
                                                     size_t* usable_size,
                                                     size_t* bytes_tl_bulk_allocated) {
-  DCHECK(IsAligned<kAlignment>(num_bytes));
+  DCHECK_ALIGNED(num_bytes, kAlignment);
   mirror::Object* obj;
   if (LIKELY(num_bytes <= kRegionSize)) {
     // Non-large object.
@@ -115,7 +115,7 @@
                                                   size_t* usable_size,
                                                   size_t* bytes_tl_bulk_allocated) {
   DCHECK(IsAllocated() && IsInToSpace());
-  DCHECK(IsAligned<kAlignment>(num_bytes));
+  DCHECK_ALIGNED(num_bytes, kAlignment);
   Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_);
   uint8_t* old_top;
   uint8_t* new_top;
@@ -266,7 +266,7 @@
 mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocated,
                                         size_t* usable_size,
                                         size_t* bytes_tl_bulk_allocated) {
-  DCHECK(IsAligned<kAlignment>(num_bytes));
+  DCHECK_ALIGNED(num_bytes, kAlignment);
   DCHECK_GT(num_bytes, kRegionSize);
   size_t num_regs = RoundUp(num_bytes, kRegionSize) / kRegionSize;
   DCHECK_GT(num_regs, 0U);
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 814ab6c..9a2d0c6 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -287,7 +287,7 @@
 
 void RegionSpace::FreeLarge(mirror::Object* large_obj, size_t bytes_allocated) {
   DCHECK(Contains(large_obj));
-  DCHECK(IsAligned<kRegionSize>(large_obj));
+  DCHECK_ALIGNED(large_obj, kRegionSize);
   MutexLock mu(Thread::Current(), region_lock_);
   uint8_t* begin_addr = reinterpret_cast<uint8_t*>(large_obj);
   uint8_t* end_addr = AlignUp(reinterpret_cast<uint8_t*>(large_obj) + bytes_allocated, kRegionSize);
@@ -366,7 +366,7 @@
   uint8_t* tlab_start = thread->GetTlabStart();
   DCHECK_EQ(thread->HasTlab(), tlab_start != nullptr);
   if (tlab_start != nullptr) {
-    DCHECK(IsAligned<kRegionSize>(tlab_start));
+    DCHECK_ALIGNED(tlab_start, kRegionSize);
     Region* r = RefToRegionLocked(reinterpret_cast<mirror::Object*>(tlab_start));
     DCHECK(r->IsAllocated());
     DCHECK_EQ(thread->GetThreadLocalBytesAllocated(), kRegionSize);
diff --git a/runtime/gc/weak_root_state.h b/runtime/gc/weak_root_state.h
new file mode 100644
index 0000000..b66f19d
--- /dev/null
+++ b/runtime/gc/weak_root_state.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_WEAK_ROOT_STATE_H_
+#define ART_RUNTIME_GC_WEAK_ROOT_STATE_H_
+
+#include <iosfwd>
+
+namespace art {
+namespace gc {
+
+enum WeakRootState {
+  // Can read or add weak roots.
+  kWeakRootStateNormal,
+  // Need to wait until we can read weak roots.
+  kWeakRootStateNoReadsOrWrites,
+  // Need to mark new weak roots to make sure they don't get swept.
+  kWeakRootStateMarkNewRoots,
+};
+
+std::ostream& operator<<(std::ostream& os, const WeakRootState&);
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_WEAK_ROOT_STATE_H_
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 2a06ab3..ae521b1 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -19,7 +19,9 @@
 #include <memory>
 
 #include "gc_root-inl.h"
+#include "gc/collector/garbage_collector.h"
 #include "gc/space/image_space.h"
+#include "gc/weak_root_state.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
@@ -31,8 +33,8 @@
 
 InternTable::InternTable()
     : image_added_to_intern_table_(false), log_new_roots_(false),
-      allow_new_interns_(true),
-      new_intern_condition_("New intern condition", *Locks::intern_table_lock_) {
+      weak_intern_condition_("New intern condition", *Locks::intern_table_lock_),
+      weak_root_state_(gc::kWeakRootStateNormal) {
 }
 
 size_t InternTable::Size() const {
@@ -88,6 +90,7 @@
 }
 
 mirror::String* InternTable::LookupWeak(mirror::String* s) {
+  // TODO: Return only if marked.
   return weak_interns_.Find(s);
 }
 
@@ -182,8 +185,7 @@
   }
 }
 
-mirror::String* InternTable::LookupStringFromImage(mirror::String* s)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+mirror::String* InternTable::LookupStringFromImage(mirror::String* s) {
   if (image_added_to_intern_table_) {
     return nullptr;
   }
@@ -211,48 +213,61 @@
   return nullptr;
 }
 
-void InternTable::AllowNewInterns() {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::intern_table_lock_);
-  allow_new_interns_ = true;
-  new_intern_condition_.Broadcast(self);
-}
-
-void InternTable::DisallowNewInterns() {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::intern_table_lock_);
-  allow_new_interns_ = false;
-}
-
-void InternTable::EnsureNewInternsDisallowed() {
+void InternTable::EnsureNewWeakInternsDisallowed() {
   // Lock and unlock once to ensure that no threads are still in the
   // middle of adding new interns.
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  CHECK(!allow_new_interns_);
+  CHECK_EQ(weak_root_state_, gc::kWeakRootStateNoReadsOrWrites);
 }
 
 void InternTable::BroadcastForNewInterns() {
   CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::intern_table_lock_);
-  new_intern_condition_.Broadcast(self);
+  weak_intern_condition_.Broadcast(self);
 }
 
-mirror::String* InternTable::Insert(mirror::String* s, bool is_strong) {
+void InternTable::WaitUntilAccessible(Thread* self) {
+  Locks::intern_table_lock_->ExclusiveUnlock(self);
+  self->TransitionFromRunnableToSuspended(kWaitingWeakRootRead);
+  Locks::intern_table_lock_->ExclusiveLock(self);
+  while (weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) {
+    weak_intern_condition_.Wait(self);
+  }
+  Locks::intern_table_lock_->ExclusiveUnlock(self);
+  self->TransitionFromSuspendedToRunnable();
+  Locks::intern_table_lock_->ExclusiveLock(self);
+}
+
+mirror::String* InternTable::Insert(mirror::String* s, bool is_strong, bool holding_locks) {
   if (s == nullptr) {
     return nullptr;
   }
-  Thread* self = Thread::Current();
+  Thread* const self = Thread::Current();
   MutexLock mu(self, *Locks::intern_table_lock_);
-  while (UNLIKELY((!kUseReadBarrier && !allow_new_interns_) ||
-                  (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
-    new_intern_condition_.WaitHoldingLocks(self);
+  if (kDebugLocking && !holding_locks) {
+    Locks::mutator_lock_->AssertSharedHeld(self);
+    CHECK_EQ(2u, self->NumberOfHeldMutexes()) << "may only safely hold the mutator lock";
   }
-  // Check the strong table for a match.
-  mirror::String* strong = LookupStrong(s);
-  if (strong != nullptr) {
-    return strong;
+  while (true) {
+    // Check the strong table for a match.
+    mirror::String* strong = LookupStrong(s);
+    if (strong != nullptr) {
+      return strong;
+    }
+    // weak_root_state_ is set to gc::kWeakRootStateNoReadsOrWrites in the GC pause but is only
+    // cleared after SweepSystemWeaks has completed. This is why we need to wait until it is
+    // cleared.
+    if (weak_root_state_ != gc::kWeakRootStateNoReadsOrWrites) {
+      break;
+    }
+    CHECK(!holding_locks);
+    StackHandleScope<1> hs(self);
+    auto h = hs.NewHandleWrapper(&s);
+    WaitUntilAccessible(self);
   }
+  CHECK_NE(weak_root_state_, gc::kWeakRootStateNoReadsOrWrites);
+  DCHECK_NE(weak_root_state_, gc::kWeakRootStateMarkNewRoots) << "Unsupported";
   // There is no match in the strong table, check the weak table.
   mirror::String* weak = LookupWeak(s);
   if (weak != nullptr) {
@@ -283,12 +298,17 @@
   return InternStrong(mirror::String::AllocFromModifiedUtf8(Thread::Current(), utf8_data));
 }
 
+mirror::String* InternTable::InternImageString(mirror::String* s) {
+  // May be holding the heap bitmap lock.
+  return Insert(s, true, true);
+}
+
 mirror::String* InternTable::InternStrong(mirror::String* s) {
-  return Insert(s, true);
+  return Insert(s, true, false);
 }
 
 mirror::String* InternTable::InternWeak(mirror::String* s) {
-  return Insert(s, false);
+  return Insert(s, false, false);
 }
 
 bool InternTable::ContainsWeak(mirror::String* s) {
@@ -296,9 +316,11 @@
   return LookupWeak(s) == s;
 }
 
-void InternTable::SweepInternTableWeaks(IsMarkedCallback* callback, void* arg) {
+void InternTable::SweepInternTableWeaks(IsMarkedVisitor* visitor) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  weak_interns_.SweepWeaks(callback, arg);
+  weak_interns_.SweepWeaks(visitor);
+  // Done sweeping, back to a normal state.
+  ChangeWeakRootStateLocked(gc::kWeakRootStateNormal);
 }
 
 void InternTable::AddImageInternTable(gc::space::ImageSpace* image_space) {
@@ -401,16 +423,16 @@
   }
 }
 
-void InternTable::Table::SweepWeaks(IsMarkedCallback* callback, void* arg) {
-  SweepWeaks(&pre_zygote_table_, callback, arg);
-  SweepWeaks(&post_zygote_table_, callback, arg);
+void InternTable::Table::SweepWeaks(IsMarkedVisitor* visitor) {
+  SweepWeaks(&pre_zygote_table_, visitor);
+  SweepWeaks(&post_zygote_table_, visitor);
 }
 
-void InternTable::Table::SweepWeaks(UnorderedSet* set, IsMarkedCallback* callback, void* arg) {
+void InternTable::Table::SweepWeaks(UnorderedSet* set, IsMarkedVisitor* visitor) {
   for (auto it = set->begin(), end = set->end(); it != end;) {
     // This does not need a read barrier because this is called by GC.
     mirror::Object* object = it->Read<kWithoutReadBarrier>();
-    mirror::Object* new_object = callback(object, arg);
+    mirror::Object* new_object = visitor->IsMarked(object);
     if (new_object == nullptr) {
       it = set->Erase(it);
     } else {
@@ -424,4 +446,16 @@
   return pre_zygote_table_.Size() + post_zygote_table_.Size();
 }
 
+void InternTable::ChangeWeakRootState(gc::WeakRootState new_state) {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  ChangeWeakRootStateLocked(new_state);
+}
+
+void InternTable::ChangeWeakRootStateLocked(gc::WeakRootState new_state) {
+  weak_root_state_ = new_state;
+  if (new_state != gc::kWeakRootStateNoReadsOrWrites) {
+    weak_intern_condition_.Broadcast(Thread::Current());
+  }
+}
+
 }  // namespace art
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 53f6f75..ef08d74 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -19,10 +19,12 @@
 
 #include <unordered_set>
 
+#include "atomic.h"
 #include "base/allocator.h"
 #include "base/hash_set.h"
 #include "base/mutex.h"
 #include "gc_root.h"
+#include "gc/weak_root_state.h"
 #include "object_callbacks.h"
 
 namespace art {
@@ -54,21 +56,25 @@
  public:
   InternTable();
 
-  // Interns a potentially new string in the 'strong' table. (See above.)
+  // Interns a potentially new string in the 'strong' table. May cause thread suspension.
   mirror::String* InternStrong(int32_t utf16_length, const char* utf8_data)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Interns a potentially new string in the 'strong' table. (See above.)
+  // Only used by image writer.
+  mirror::String* InternImageString(mirror::String* s)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Interns a potentially new string in the 'strong' table. May cause thread suspension.
   mirror::String* InternStrong(const char* utf8_data)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Interns a potentially new string in the 'strong' table. (See above.)
+  // Interns a potentially new string in the 'strong' table. May cause thread suspension.
   mirror::String* InternStrong(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Interns a potentially new string in the 'weak' table. (See above.)
+  // Interns a potentially new string in the 'weak' table. May cause thread suspension.
   mirror::String* InternWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepInternTableWeaks(IsMarkedCallback* callback, void* arg)
+  void SweepInternTableWeaks(IsMarkedVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -89,6 +95,7 @@
   void AllowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void EnsureNewInternsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void BroadcastForNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnsureNewWeakInternsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Adds all of the resolved image strings from the image space into the intern table. The
   // advantage of doing this is preventing expensive DexFile::FindStringId calls.
@@ -112,6 +119,10 @@
   size_t WriteToMemory(uint8_t* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::intern_table_lock_);
 
+  // Change the weak root state. May broadcast to waiters.
+  void ChangeWeakRootState(gc::WeakRootState new_state)
+      LOCKS_EXCLUDED(Locks::intern_table_lock_);
+
  private:
   class StringHashEquals {
    public:
@@ -143,7 +154,7 @@
     void VisitRoots(RootVisitor* visitor)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-    void SweepWeaks(IsMarkedCallback* callback, void* arg)
+    void SweepWeaks(IsMarkedVisitor* visitor)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
     void SwapPostZygoteWithPreZygote() EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
@@ -163,7 +174,7 @@
     typedef HashSet<GcRoot<mirror::String>, GcRootEmptyFn, StringHashEquals, StringHashEquals,
         TrackingAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>> UnorderedSet;
 
-    void SweepWeaks(UnorderedSet* set, IsMarkedCallback* callback, void* arg)
+    void SweepWeaks(UnorderedSet* set, IsMarkedVisitor* visitor)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
 
@@ -176,7 +187,7 @@
   };
 
   // Insert if non null, otherwise return null.
-  mirror::String* Insert(mirror::String* s, bool is_strong)
+  mirror::String* Insert(mirror::String* s, bool is_strong, bool holding_locks)
       LOCKS_EXCLUDED(Locks::intern_table_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -221,10 +232,17 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Change the weak root state. May broadcast to waiters.
+  void ChangeWeakRootStateLocked(gc::WeakRootState new_state)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+
+  // Wait until we can read weak roots.
+  void WaitUntilAccessible(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
-  bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
-  ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
+  ConditionVariable weak_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   // Since this contains (strong) roots, they need a read barrier to
   // enable concurrent intern table (strong) root scan. Do not
   // directly access the strings in it. Use functions that contain
@@ -236,6 +254,8 @@
   // not directly access the strings in it. Use functions that contain
   // read barriers.
   Table weak_interns_ GUARDED_BY(Locks::intern_table_lock_);
+  // Weak root state, used for concurrent system weak processing and more.
+  gc::WeakRootState weak_root_state_ GUARDED_BY(Locks::intern_table_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 194d0af..c987180 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -60,9 +60,9 @@
   EXPECT_EQ(2U, t.Size());
 }
 
-class TestPredicate {
+class TestPredicate : public IsMarkedVisitor {
  public:
-  bool IsMarked(const mirror::Object* s) const {
+  mirror::Object* IsMarked(mirror::Object* s) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     bool erased = false;
     for (auto it = expected_.begin(), end = expected_.end(); it != end; ++it) {
       if (*it == s) {
@@ -72,7 +72,7 @@
       }
     }
     EXPECT_TRUE(erased);
-    return false;
+    return nullptr;
   }
 
   void Expect(const mirror::String* s) {
@@ -87,13 +87,6 @@
   mutable std::vector<const mirror::String*> expected_;
 };
 
-mirror::Object* IsMarkedSweepingCallback(mirror::Object* object, void* arg) {
-  if (reinterpret_cast<TestPredicate*>(arg)->IsMarked(object)) {
-    return object;
-  }
-  return nullptr;
-}
-
 TEST_F(InternTableTest, SweepInternTableWeaks) {
   ScopedObjectAccess soa(Thread::Current());
   InternTable t;
@@ -115,7 +108,7 @@
   p.Expect(s1.Get());
   {
     ReaderMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
-    t.SweepInternTableWeaks(IsMarkedSweepingCallback, &p);
+    t.SweepInternTableWeaks(&p);
   }
 
   EXPECT_EQ(2U, t.Size());
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index a12a58d..9babb18 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -34,6 +34,7 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
+#include "lambda/box_table.h"
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
@@ -446,10 +447,10 @@
     return 3;
   }
   const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
-  DCHECK(IsAligned<4>(keys));
+  DCHECK_ALIGNED(keys, 4);
   int32_t first_key = keys[0];
   const int32_t* targets = reinterpret_cast<const int32_t*>(&switch_data[4]);
-  DCHECK(IsAligned<4>(targets));
+  DCHECK_ALIGNED(targets, 4);
   int32_t index = test_val - first_key;
   if (index >= 0 && index < size) {
     return targets[index];
@@ -474,9 +475,9 @@
     return 3;
   }
   const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
-  DCHECK(IsAligned<4>(keys));
+  DCHECK_ALIGNED(keys, 4);
   const int32_t* entries = keys + size;
-  DCHECK(IsAligned<4>(entries));
+  DCHECK_ALIGNED(entries, 4);
   int lo = 0;
   int hi = size - 1;
   while (lo <= hi) {
@@ -506,8 +507,8 @@
   uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
   uint32_t vreg_source_closure = inst->VRegB_22x();
 
-  ArtMethod* const closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
-                                                                      vreg_source_closure);
+  ArtMethod* closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
+                                                                vreg_source_closure);
 
   // Failed lambda target runtime check, an exception was raised.
   if (UNLIKELY(closure_method == nullptr)) {
@@ -515,28 +516,21 @@
     return false;
   }
 
-  // Convert the ArtMethod into a java.lang.reflect.Method which will serve
-  // as the temporary 'boxed' version of the lambda. This is good enough
-  // to check all the basic object identities that a boxed lambda must retain.
+  mirror::Object* closure_as_object =
+      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(closure_method);
 
-  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
-  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  // TODO: Repeated boxing should return the same object reference
-  mirror::Method* method_as_object =
-      mirror::Method::CreateFromArtMethod(self, closure_method);
-
-  if (UNLIKELY(method_as_object == nullptr)) {
-    // Most likely an OOM has occurred.
+  // Failed to box the lambda, an exception was raised.
+  if (UNLIKELY(closure_as_object == nullptr)) {
     CHECK(self->IsExceptionPending());
     return false;
   }
 
-  shadow_frame.SetVRegReference(vreg_target_object, method_as_object);
+  shadow_frame.SetVRegReference(vreg_target_object, closure_as_object);
   return true;
 }
 
 template <bool _do_check> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-static inline bool DoUnboxLambda(Thread* self ATTRIBUTE_UNUSED,
+static inline bool DoUnboxLambda(Thread* self,
                                  ShadowFrame& shadow_frame,
                                  const Instruction* inst,
                                  uint16_t inst_data) {
@@ -556,23 +550,15 @@
     return false;
   }
 
-  // Raise ClassCastException if object is not instanceof java.lang.reflect.Method
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) {
-    ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass());
+  ArtMethod* unboxed_closure = nullptr;
+  // Raise an exception if unboxing fails.
+  if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
+                                                            &unboxed_closure)) {
+    CHECK(self->IsExceptionPending());
     return false;
   }
 
-  // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a Method.
-
-  // If we got this far, the inputs are valid.
-  // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target.
-  mirror::AbstractMethod* boxed_closure_as_method =
-      down_cast<mirror::AbstractMethod*>(boxed_closure_object);
-
-  ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod();
   DCHECK(unboxed_closure != nullptr);
-
   WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure);
   return true;
 }
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 36adbea..36e3aa3 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -766,7 +766,7 @@
   return native_method;
 }
 
-void JavaVMExt::SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg) {
+void JavaVMExt::SweepJniWeakGlobals(IsMarkedVisitor* visitor) {
   MutexLock mu(Thread::Current(), weak_globals_lock_);
   Runtime* const runtime = Runtime::Current();
   for (auto* entry : weak_globals_) {
@@ -774,7 +774,7 @@
     if (!entry->IsNull()) {
       // Since this is called by the GC, we don't need a read barrier.
       mirror::Object* obj = entry->Read<kWithoutReadBarrier>();
-      mirror::Object* new_obj = callback(obj, arg);
+      mirror::Object* new_obj = visitor->IsMarked(obj);
       if (new_obj == nullptr) {
         new_obj = runtime->GetClearedJniWeakGlobal();
       }
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 694a545..97fbbc5 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -120,7 +120,7 @@
 
   void DeleteWeakGlobalRef(Thread* self, jweak obj);
 
-  void SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg)
+  void SweepJniWeakGlobals(IsMarkedVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* DecodeGlobal(Thread* self, IndirectRef ref)
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
new file mode 100644
index 0000000..64a6076
--- /dev/null
+++ b/runtime/lambda/box_table.cc
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "lambda/box_table.h"
+
+#include "base/mutex.h"
+#include "common_throws.h"
+#include "gc_root-inl.h"
+#include "mirror/method.h"
+#include "mirror/object-inl.h"
+#include "thread.h"
+
+#include <vector>
+
+namespace art {
+namespace lambda {
+
+BoxTable::BoxTable()
+  : allow_new_weaks_(true),
+    new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {}
+
+mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
+  Thread* self = Thread::Current();
+
+  {
+    // TODO: Switch to ReaderMutexLock if ConditionVariable ever supports RW Mutexes
+    /*Reader*/MutexLock mu(self, *Locks::lambda_table_lock_);
+    BlockUntilWeaksAllowed();
+
+    // Attempt to look up this object, it's possible it was already boxed previously.
+    // If this is the case we *must* return the same object as before to maintain
+    // referential equality.
+    //
+    // In managed code:
+    //   Functional f = () -> 5;  // vF = create-lambda
+    //   Object a = f;            // vA = box-lambda vA
+    //   Object b = f;            // vB = box-lambda vB
+    //   assert(a == f)
+    ValueType value = FindBoxedLambda(closure);
+    if (!value.IsNull()) {
+      return value.Read();
+    }
+
+    // Otherwise we need to box ourselves and insert it into the hash map
+  }
+
+  // Release the lambda table lock here, so that thread suspension is allowed.
+
+  // Convert the ArtMethod into a java.lang.reflect.Method which will serve
+  // as the temporary 'boxed' version of the lambda. This is good enough
+  // to check all the basic object identities that a boxed lambda must retain.
+
+  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
+  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
+  mirror::Method* method_as_object =
+      mirror::Method::CreateFromArtMethod(self, closure);
+  // There are no thread suspension points after this, so we don't need to put it into a handle.
+
+  if (UNLIKELY(method_as_object == nullptr)) {
+    // Most likely an OOM has occurred.
+    CHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
+  // The method has been successfully boxed into an object, now insert it into the hash map.
+  {
+    MutexLock mu(self, *Locks::lambda_table_lock_);
+    BlockUntilWeaksAllowed();
+
+    // Lookup the object again, it's possible another thread already boxed it while
+    // we were allocating the object before.
+    ValueType value = FindBoxedLambda(closure);
+    if (UNLIKELY(!value.IsNull())) {
+      // Let the GC clean up method_as_object at a later time.
+      return value.Read();
+    }
+
+    // Otherwise we should insert it into the hash map in this thread.
+    map_.Insert(std::make_pair(closure, ValueType(method_as_object)));
+  }
+
+  return method_as_object;
+}
+
+bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
+  DCHECK(object != nullptr);
+  *out_closure = nullptr;
+
+  // Note that we do not need to access lambda_table_lock_ here
+  // since we don't need to look at the map.
+
+  mirror::Object* boxed_closure_object = object;
+
+  // Raise ClassCastException if object is not instanceof java.lang.reflect.Method
+  if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) {
+    ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass());
+    return false;
+  }
+
+  // TODO(iam): We must check that the closure object extends/implements the type
+  // specified in [type id]. This is not currently implemented since it's always a Method.
+
+  // If we got this far, the inputs are valid.
+  // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target.
+  mirror::AbstractMethod* boxed_closure_as_method =
+      down_cast<mirror::AbstractMethod*>(boxed_closure_object);
+
+  ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod();
+  DCHECK(unboxed_closure != nullptr);
+
+  *out_closure = unboxed_closure;
+  return true;
+}
+
+BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const {
+  auto map_iterator = map_.Find(closure);
+  if (map_iterator != map_.end()) {
+    const std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator;
+    const ValueType& value = key_value_pair.second;
+
+    DCHECK(!value.IsNull());  // Never store null boxes.
+    return value;
+  }
+
+  return ValueType(nullptr);
+}
+
+void BoxTable::BlockUntilWeaksAllowed() {
+  Thread* self = Thread::Current();
+  while (UNLIKELY(allow_new_weaks_ == false)) {
+    new_weaks_condition_.WaitHoldingLocks(self);  // wait while holding mutator lock
+  }
+}
+
+void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) {
+  DCHECK(visitor != nullptr);
+
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::lambda_table_lock_);
+
+  /*
+   * Visit every weak root in our lambda box table.
+   * Remove unmarked objects, update marked objects to new address.
+   */
+  std::vector<ClosureType> remove_list;
+  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
+    std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator;
+
+    const ValueType& old_value = key_value_pair.second;
+
+    // This does not need a read barrier because this is called by GC.
+    mirror::Object* old_value_raw = old_value.Read<kWithoutReadBarrier>();
+    mirror::Object* new_value = visitor->IsMarked(old_value_raw);
+
+    if (new_value == nullptr) {
+      const ClosureType& closure = key_value_pair.first;
+      // The object has been swept away.
+      // Delete the entry from the map.
+      map_iterator = map_.Erase(map_.Find(closure));
+    } else {
+      // The object has been moved.
+      // Update the map.
+      key_value_pair.second = ValueType(new_value);
+      ++map_iterator;
+    }
+  }
+
+  // Occasionally shrink the map to avoid growing very large.
+  if (map_.CalculateLoadFactor() < kMinimumLoadFactor) {
+    map_.ShrinkToMaximumLoad();
+  }
+}
+
+void BoxTable::DisallowNewWeakBoxedLambdas() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::lambda_table_lock_);
+
+  allow_new_weaks_ = false;
+}
+
+void BoxTable::AllowNewWeakBoxedLambdas() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::lambda_table_lock_);
+
+  allow_new_weaks_ = true;
+  new_weaks_condition_.Broadcast(self);
+}
+
+void BoxTable::EnsureNewWeakBoxedLambdasDisallowed() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::lambda_table_lock_);
+  CHECK_NE(allow_new_weaks_, false);
+}
+
+bool BoxTable::EqualsFn::operator()(const ClosureType& lhs, const ClosureType& rhs) const {
+  // Nothing needs this right now, but leave this assertion for later when
+  // we need to look at the references inside of the closure.
+  if (kIsDebugBuild) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
+
+  // TODO: Need rework to use read barriers once closures have references inside of them that can
+  // move. Until then, it's safe to just compare the data inside of it directly.
+  return lhs == rhs;
+}
+
+}  // namespace lambda
+}  // namespace art
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
new file mode 100644
index 0000000..12d3ff3
--- /dev/null
+++ b/runtime/lambda/box_table.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ART_RUNTIME_LAMBDA_BOX_TABLE_H_
+#define ART_RUNTIME_LAMBDA_BOX_TABLE_H_
+
+#include "base/allocator.h"
+#include "base/hash_map.h"
+#include "gc_root.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "object_callbacks.h"
+
+#include <stdint.h>
+
+namespace art {
+
+class ArtMethod;  // forward declaration
+
+namespace mirror {
+class Object;  // forward declaration
+}  // namespace mirror
+
+namespace lambda {
+
+/*
+ * Store a table of boxed lambdas. This is required to maintain object referential equality
+ * when a lambda is re-boxed.
+ *
+ * Conceptually, we store a mapping of Closures -> Weak Reference<Boxed Lambda Object>.
+ * When too many objects get GCd, we shrink the underlying table to use less space.
+ */
+class BoxTable FINAL {
+ public:
+  using ClosureType = art::ArtMethod*;
+
+  // Boxes a closure into an object. Returns null and throws an exception on failure.
+  mirror::Object* BoxLambda(const ClosureType& closure)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::lambda_table_lock_);
+
+  // Unboxes an object back into the lambda. Returns false and throws an exception on failure.
+  bool UnboxLambda(mirror::Object* object, ClosureType* out_closure)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Sweep weak references to lambda boxes. Update the addresses if the objects have been
+  // moved, and delete them from the table if the objects have been cleaned up.
+  void SweepWeakBoxedLambdas(IsMarkedVisitor* visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::lambda_table_lock_);
+
+  // GC callback: Temporarily block anyone from touching the map.
+  void DisallowNewWeakBoxedLambdas()
+      LOCKS_EXCLUDED(Locks::lambda_table_lock_);
+
+  // GC callback: Unblock any readers who have been queued waiting to touch the map.
+  void AllowNewWeakBoxedLambdas()
+      LOCKS_EXCLUDED(Locks::lambda_table_lock_);
+
+  // GC callback: Verify that the state is now blocking anyone from touching the map.
+  void EnsureNewWeakBoxedLambdasDisallowed()
+      LOCKS_EXCLUDED(Locks::lambda_table_lock_);
+
+  BoxTable();
+  ~BoxTable() = default;
+
+ private:
+  // FIXME: This needs to be a GcRoot.
+  // Explanation:
+  // - After all threads are suspended (exclusive mutator lock),
+  //   the concurrent-copying GC can move objects from the "from" space to the "to" space.
+  // If an object is moved at that time and *before* SweepSystemWeaks are called then
+  // we don't know if the move has happened yet.
+  // Successive reads will then (incorrectly) look at the objects in the "from" space,
+  // which is a problem since the objects have been already forwarded and mutations
+  // would not be visible in the right space.
+  // Instead, use a GcRoot here which will be automatically updated by the GC.
+  //
+  // Also, any reads should be protected by a read barrier to always give us the "to" space address.
+  using ValueType = GcRoot<mirror::Object>;
+
+  // Attempt to look up the lambda in the map, or return null if it's not there yet.
+  ValueType FindBoxedLambda(const ClosureType& closure) const
+      SHARED_LOCKS_REQUIRED(Locks::lambda_table_lock_);
+
+  // If the GC has come in and temporarily disallowed touching weaks, block until is it allowed.
+  void BlockUntilWeaksAllowed()
+      SHARED_LOCKS_REQUIRED(Locks::lambda_table_lock_);
+
+  // EmptyFn implementation for art::HashMap
+  struct EmptyFn {
+    void MakeEmpty(std::pair<ClosureType, ValueType>& item) const {
+      item.first = nullptr;
+    }
+    bool IsEmpty(const std::pair<ClosureType, ValueType>& item) const {
+      return item.first == nullptr;
+    }
+  };
+
+  // HashFn implementation for art::HashMap
+  struct HashFn {
+    size_t operator()(const ClosureType& key) const {
+      // TODO(iam): Rewrite hash function when ClosureType is no longer an ArtMethod*
+      return static_cast<size_t>(reinterpret_cast<uintptr_t>(key));
+    }
+  };
+
+  // EqualsFn implementation for art::HashMap
+  struct EqualsFn {
+    bool operator()(const ClosureType& lhs, const ClosureType& rhs) const;
+  };
+
+  using UnorderedMap = art::HashMap<ClosureType,
+                                    ValueType,
+                                    EmptyFn,
+                                    HashFn,
+                                    EqualsFn,
+                                    TrackingAllocator<std::pair<ClosureType, ValueType>,
+                                                      kAllocatorTagLambdaBoxTable>>;
+
+  UnorderedMap map_                                          GUARDED_BY(Locks::lambda_table_lock_);
+  bool allow_new_weaks_                                      GUARDED_BY(Locks::lambda_table_lock_);
+  ConditionVariable new_weaks_condition_                     GUARDED_BY(Locks::lambda_table_lock_);
+
+  // Shrink the map when we get below this load factor.
+  // (This is an arbitrary value that should be large enough to prevent aggressive map erases
+  // from shrinking the table too often.)
+  static constexpr double kMinimumLoadFactor = UnorderedMap::kDefaultMinLoadFactor / 2;
+
+  DISALLOW_COPY_AND_ASSIGN(BoxTable);
+};
+
+}  // namespace lambda
+}  // namespace art
+
+#endif  // ART_RUNTIME_LAMBDA_BOX_TABLE_H_
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index a290575..245f8b8 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -118,7 +118,7 @@
   }
 
   static LockWord FromForwardingAddress(size_t target) {
-    DCHECK(IsAligned < 1 << kStateSize>(target));
+    DCHECK_ALIGNED(target, (1 << kStateSize));
     return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
   }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index dbae7f8..8df8f96 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -585,10 +585,10 @@
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
-  DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(base_begin_));
-  DCHECK(IsAligned<kPageSize>(reinterpret_cast<uint8_t*>(base_begin_) + base_size_));
-  DCHECK(IsAligned<kPageSize>(new_end));
+  DCHECK_ALIGNED(begin_, kPageSize);
+  DCHECK_ALIGNED(base_begin_, kPageSize);
+  DCHECK_ALIGNED(reinterpret_cast<uint8_t*>(base_begin_) + base_size_, kPageSize);
+  DCHECK_ALIGNED(new_end, kPageSize);
   uint8_t* old_end = begin_ + size_;
   uint8_t* old_base_end = reinterpret_cast<uint8_t*>(base_begin_) + base_size_;
   uint8_t* new_base_end = new_end;
@@ -603,7 +603,7 @@
   uint8_t* tail_base_begin = new_base_end;
   size_t tail_base_size = old_base_end - new_base_end;
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
-  DCHECK(IsAligned<kPageSize>(tail_base_size));
+  DCHECK_ALIGNED(tail_base_size, kPageSize);
 
 #ifdef USE_ASHMEM
   // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
@@ -726,7 +726,7 @@
     size_t num_gaps = 0;
     size_t num = 1u;
     size_t size = map->BaseSize();
-    CHECK(IsAligned<kPageSize>(size));
+    CHECK_ALIGNED(size, kPageSize);
     void* end = map->BaseEnd();
     while (it != maps_end &&
         it->second->GetProtect() == map->GetProtect() &&
@@ -740,12 +740,12 @@
         }
         size_t gap =
             reinterpret_cast<uintptr_t>(it->second->BaseBegin()) - reinterpret_cast<uintptr_t>(end);
-        CHECK(IsAligned<kPageSize>(gap));
+        CHECK_ALIGNED(gap, kPageSize);
         os << "~0x" << std::hex << (gap / kPageSize) << "P";
         num = 0u;
         size = 0u;
       }
-      CHECK(IsAligned<kPageSize>(it->second->BaseSize()));
+      CHECK_ALIGNED(it->second->BaseSize(), kPageSize);
       ++num;
       size += it->second->BaseSize();
       end = it->second->BaseEnd();
diff --git a/runtime/mirror/abstract_method.h b/runtime/mirror/abstract_method.h
index 99d697a..6240b3b 100644
--- a/runtime/mirror/abstract_method.h
+++ b/runtime/mirror/abstract_method.h
@@ -60,9 +60,8 @@
 
   HeapReference<mirror::Class> declaring_class_;
   HeapReference<mirror::Class> declaring_class_of_overridden_method_;
-  uint32_t padding_;
-  uint64_t art_method_;
   uint32_t access_flags_;
+  uint64_t art_method_;
   uint32_t dex_method_index_;
 
   friend struct art::AbstractMethodOffsets;  // for verifying offset information
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bc89890..fd9c1b1 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1163,7 +1163,7 @@
   list_.push_front(m);
 }
 
-void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) {
+void MonitorList::SweepMonitorList(IsMarkedVisitor* visitor) {
   Thread* self = Thread::Current();
   MutexLock mu(self, monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
@@ -1171,7 +1171,7 @@
     // Disable the read barrier in GetObject() as this is called by GC.
     mirror::Object* obj = m->GetObject<kWithoutReadBarrier>();
     // The object of a monitor can be null if we have deflated it.
-    mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr;
+    mirror::Object* new_obj = obj != nullptr ? visitor->IsMarked(obj) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
                     << obj;
@@ -1184,29 +1184,30 @@
   }
 }
 
-struct MonitorDeflateArgs {
-  MonitorDeflateArgs() : self(Thread::Current()), deflate_count(0) {}
-  Thread* const self;
-  size_t deflate_count;
+class MonitorDeflateVisitor : public IsMarkedVisitor {
+ public:
+  MonitorDeflateVisitor() : self_(Thread::Current()), deflate_count_(0) {}
+
+  virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (Monitor::Deflate(self_, object)) {
+      DCHECK_NE(object->GetLockWord(true).GetState(), LockWord::kFatLocked);
+      ++deflate_count_;
+      // If we deflated, return null so that the monitor gets removed from the array.
+      return nullptr;
+    }
+    return object;  // Monitor was not deflated.
+  }
+
+  Thread* const self_;
+  size_t deflate_count_;
 };
 
-static mirror::Object* MonitorDeflateCallback(mirror::Object* object, void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  MonitorDeflateArgs* args = reinterpret_cast<MonitorDeflateArgs*>(arg);
-  if (Monitor::Deflate(args->self, object)) {
-    DCHECK_NE(object->GetLockWord(true).GetState(), LockWord::kFatLocked);
-    ++args->deflate_count;
-    // If we deflated, return null so that the monitor gets removed from the array.
-    return nullptr;
-  }
-  return object;  // Monitor was not deflated.
-}
-
 size_t MonitorList::DeflateMonitors() {
-  MonitorDeflateArgs args;
-  Locks::mutator_lock_->AssertExclusiveHeld(args.self);
-  SweepMonitorList(MonitorDeflateCallback, &args);
-  return args.deflate_count;
+  MonitorDeflateVisitor visitor;
+  Locks::mutator_lock_->AssertExclusiveHeld(visitor.self_);
+  SweepMonitorList(&visitor);
+  return visitor.deflate_count_;
 }
 
 MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(nullptr), entry_count_(0) {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 8f6fb75..09a6cb6 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -287,7 +287,7 @@
 
   void Add(Monitor* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepMonitorList(IsMarkedCallback* callback, void* arg)
+  void SweepMonitorList(IsMarkedVisitor* visitor)
       LOCKS_EXCLUDED(monitor_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
   void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 736b42b..97aae67 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -105,15 +105,21 @@
         dstArray->AsShortSizedArray()->Memmove(dstPos, srcArray->AsShortSizedArray(), srcPos, count);
         return;
       case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
         DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 4U);
         dstArray->AsIntArray()->Memmove(dstPos, srcArray->AsIntArray(), srcPos, count);
         return;
+      case Primitive::kPrimFloat:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 4U);
+        dstArray->AsFloatArray()->Memmove(dstPos, srcArray->AsFloatArray(), srcPos, count);
+        return;
       case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
         DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 8U);
         dstArray->AsLongArray()->Memmove(dstPos, srcArray->AsLongArray(), srcPos, count);
         return;
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 8U);
+        dstArray->AsDoubleArray()->Memmove(dstPos, srcArray->AsDoubleArray(), srcPos, count);
+        return;
       case Primitive::kPrimNot: {
         mirror::ObjectArray<mirror::Object>* dstObjArray = dstArray->AsObjectArray<mirror::Object>();
         mirror::ObjectArray<mirror::Object>* srcObjArray = srcArray->AsObjectArray<mirror::Object>();
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 6569d83..b40d94a 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -90,6 +90,7 @@
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
     case kWaitingForMethodTracingStart:   return kJavaWaiting;
     case kWaitingForVisitObjects:         return kJavaWaiting;
+    case kWaitingWeakRootRead:            return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 1dd2aad..5725b6f 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -97,7 +97,7 @@
   image_file_location_oat_checksum_ = image_file_location_oat_checksum;
   UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
 
-  CHECK(IsAligned<kPageSize>(image_file_location_oat_data_begin));
+  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
   image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
   UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
 
diff --git a/runtime/oat.h b/runtime/oat.h
index 3451d0f..ee2f3f6 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '6', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '7', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 6b3b666..5df6525 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -22,7 +22,7 @@
 namespace art {
 
 inline const OatQuickMethodHeader* OatFile::OatMethod::GetOatQuickMethodHeader() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -38,14 +38,6 @@
   return reinterpret_cast<const uint8_t*>(method_header) - begin_;
 }
 
-inline uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
-  if (code == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-}
-
 inline uint32_t OatFile::OatMethod::GetQuickCodeSizeOffset() const {
   const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
   if (method_header == nullptr) {
@@ -78,8 +70,8 @@
   return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
 }
 
-const uint8_t* OatFile::OatMethod::GetGcMap() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+inline const uint8_t* OatFile::OatMethod::GetGcMap() const {
+  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -90,12 +82,12 @@
   return reinterpret_cast<const uint8_t*>(code) - offset;
 }
 
-uint32_t OatFile::OatMethod::GetGcMapOffset() const {
+inline uint32_t OatFile::OatMethod::GetGcMapOffset() const {
   const uint8_t* gc_map = GetGcMap();
   return static_cast<uint32_t>(gc_map != nullptr ? gc_map - begin_ : 0u);
 }
 
-uint32_t OatFile::OatMethod::GetGcMapOffsetOffset() const {
+inline uint32_t OatFile::OatMethod::GetGcMapOffsetOffset() const {
   const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
   if (method_header == nullptr) {
     return 0u;
@@ -130,7 +122,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetMappingTable() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -142,7 +134,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -153,6 +145,22 @@
   return reinterpret_cast<const uint8_t*>(code) - offset;
 }
 
+inline uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
+  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  if (code == nullptr) {
+    return 0u;
+  }
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+}
+
+inline uint32_t OatFile::OatMethod::GetCodeOffset() const {
+  return (GetQuickCodeSize() == 0) ? 0 : code_offset_;
+}
+
+inline const void* OatFile::OatMethod::GetQuickCode() const {
+  return GetOatPointer<const void*>(GetCodeOffset());
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_OAT_FILE_INL_H_
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index ad5741e..098fe61 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -41,6 +41,7 @@
 #include "mem_map.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
+#include "oat_file-inl.h"
 #include "os.h"
 #include "runtime.h"
 #include "utils.h"
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 1a782de..7c4ef8b 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -100,13 +100,9 @@
    public:
     void LinkMethod(ArtMethod* method) const;
 
-    uint32_t GetCodeOffset() const {
-      return code_offset_;
-    }
+    uint32_t GetCodeOffset() const;
 
-    const void* GetQuickCode() const {
-      return GetOatPointer<const void*>(code_offset_);
-    }
+    const void* GetQuickCode() const;
 
     // Returns size of quick code.
     uint32_t GetQuickCodeSize() const;
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index df0cf45..29b879e 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -165,6 +165,11 @@
 }
 
 std::unique_ptr<OatFile> OatFileAssistant::GetBestOatFile() {
+  // The best oat files are, in descending order of bestness:
+  // 1. Properly relocated files. These may be opened executable.
+  // 2. Not out-of-date files that are already opened non-executable.
+  // 3. Not out-of-date files that we must reopen non-executable.
+
   if (OatFileIsUpToDate()) {
     oat_file_released_ = true;
     return std::move(cached_oat_file_);
@@ -175,26 +180,36 @@
     return std::move(cached_odex_file_);
   }
 
-  if (load_executable_) {
-    VLOG(oat) << "Oat File Assistant: No relocated oat file found,"
-      << " attempting to fall back to interpreting oat file instead.";
+  VLOG(oat) << "Oat File Assistant: No relocated oat file found,"
+    << " attempting to fall back to interpreting oat file instead.";
 
+  if (!OatFileIsOutOfDate() && !OatFileIsExecutable()) {
+    oat_file_released_ = true;
+    return std::move(cached_oat_file_);
+  }
+
+  if (!OdexFileIsOutOfDate() && !OdexFileIsExecutable()) {
+    oat_file_released_ = true;
+    return std::move(cached_odex_file_);
+  }
+
+  if (!OatFileIsOutOfDate()) {
+    load_executable_ = false;
+    ClearOatFileCache();
     if (!OatFileIsOutOfDate()) {
-      load_executable_ = false;
-      ClearOatFileCache();
-      if (!OatFileIsOutOfDate()) {
-        oat_file_released_ = true;
-        return std::move(cached_oat_file_);
-      }
+      CHECK(!OatFileIsExecutable());
+      oat_file_released_ = true;
+      return std::move(cached_oat_file_);
     }
+  }
 
+  if (!OdexFileIsOutOfDate()) {
+    load_executable_ = false;
+    ClearOdexFileCache();
     if (!OdexFileIsOutOfDate()) {
-      load_executable_ = false;
-      ClearOdexFileCache();
-      if (!OdexFileIsOutOfDate()) {
-        oat_file_released_ = true;
-        return std::move(cached_odex_file_);
-      }
+      CHECK(!OdexFileIsExecutable());
+      oat_file_released_ = true;
+      return std::move(cached_odex_file_);
     }
   }
 
@@ -868,6 +883,11 @@
   return cached_odex_file_.get();
 }
 
+bool OatFileAssistant::OdexFileIsExecutable() {
+  const OatFile* odex_file = GetOdexFile();
+  return (odex_file != nullptr && odex_file->IsExecutable());
+}
+
 void OatFileAssistant::ClearOdexFileCache() {
   odex_file_load_attempted_ = false;
   cached_odex_file_.reset();
@@ -894,6 +914,11 @@
   return cached_oat_file_.get();
 }
 
+bool OatFileAssistant::OatFileIsExecutable() {
+  const OatFile* oat_file = GetOatFile();
+  return (oat_file != nullptr && oat_file->IsExecutable());
+}
+
 void OatFileAssistant::ClearOatFileCache() {
   oat_file_load_attempted_ = false;
   cached_oat_file_.reset();
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 7216fc7..664db98 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -327,6 +327,9 @@
   // The caller shouldn't clean up or free the returned pointer.
   const OatFile* GetOdexFile();
 
+  // Returns true if the odex file is opened executable.
+  bool OdexFileIsExecutable();
+
   // Clear any cached information about the odex file that depends on the
   // contents of the file.
   void ClearOdexFileCache();
@@ -336,6 +339,9 @@
   // The caller shouldn't clean up or free the returned pointer.
   const OatFile* GetOatFile();
 
+  // Returns true if the oat file is opened executable.
+  bool OatFileIsExecutable();
+
   // Clear any cached information about the oat file that depends on the
   // contents of the file.
   void ClearOatFileCache();
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 570c59c..d133fa3 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -446,6 +446,27 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
+// Some tests very occasionally fail: we expect to have an unrelocated non-pic
+// odex file that is reported as needing relocation, but it is reported
+// instead as being up to date (b/22599792).
+//
+// This function adds extra checks for diagnosing why the given oat file is
+// reported up to date, when it should be non-pic needing relocation.
+// These extra diagnostics checks should be removed once b/22599792 has been
+// resolved.
+static void DiagnoseFlakyTestFailure(const OatFile& oat_file) {
+  Runtime* runtime = Runtime::Current();
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+  ASSERT_TRUE(image_space != nullptr);
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  const OatHeader& oat_header = oat_file.GetOatHeader();
+  EXPECT_FALSE(oat_file.IsPic());
+  EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
+  EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+      oat_header.GetImageFileLocationOatDataBegin());
+  EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
+}
+
 // Case: We have a DEX file and an ODEX file, but no OAT file.
 // Expect: The status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, DexOdexNoOat) {
@@ -470,6 +491,12 @@
   EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+
+  // We should still be able to get the non-executable odex file to run from.
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+
+  DiagnoseFlakyTestFailure(*oat_file);
 }
 
 // Case: We have a stripped DEX file and an ODEX file, but no OAT file.
@@ -712,17 +739,7 @@
   dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
   EXPECT_EQ(1u, dex_files.size());
 
-  // Add some extra checks to help diagnose apparently flaky test failures.
-  Runtime* runtime = Runtime::Current();
-  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
-  ASSERT_TRUE(image_space != nullptr);
-  const ImageHeader& image_header = image_space->GetImageHeader();
-  const OatHeader& oat_header = oat_file->GetOatHeader();
-  EXPECT_FALSE(oat_file->IsPic());
-  EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
-  EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
-      oat_header.GetImageFileLocationOatDataBegin());
-  EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
+  DiagnoseFlakyTestFailure(*oat_file);
 }
 
 // Case: We have a DEX file and a PIC ODEX file, but no OAT file.
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index df34ce7..4d726ec 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -21,31 +21,30 @@
 
 namespace art {
 namespace mirror {
-  class Class;
   class Object;
   template<class MirrorType> class HeapReference;
-  class Reference;
 }  // namespace mirror
-class StackVisitor;
 
 // A callback for visiting an object in the heap.
 typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
-// A callback used for marking an object, returns the new address of the object if the object moved.
-typedef mirror::Object* (MarkObjectCallback)(mirror::Object* obj, void* arg) WARN_UNUSED;
 
-typedef void (MarkHeapReferenceCallback)(mirror::HeapReference<mirror::Object>* ref, void* arg);
-typedef void (DelayReferenceReferentCallback)(mirror::Class* klass, mirror::Reference* ref,
-    void* arg);
+class IsMarkedVisitor {
+ public:
+  virtual ~IsMarkedVisitor() {}
+  // Return null if an object is not marked, otherwise returns the new address of that object.
+  // May return the same address as the input if the object did not move.
+  virtual mirror::Object* IsMarked(mirror::Object* obj) = 0;
+};
 
-// A callback for testing if an object is marked, returns null if not marked, otherwise the new
-// address the object (if the object didn't move, returns the object input parameter).
-typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg) WARN_UNUSED;
-
-// Returns true if the object in the heap reference is marked, if it is marked and has moved the
-// callback updates the heap reference contain the new value.
-typedef bool (IsHeapReferenceMarkedCallback)(mirror::HeapReference<mirror::Object>* object,
-    void* arg) WARN_UNUSED;
-typedef void (ProcessMarkStackCallback)(void* arg);
+class MarkObjectVisitor {
+ public:
+  virtual ~MarkObjectVisitor() {}
+  // Mark an object and return the new address of an object.
+  // May return the same address as the input if the object did not move.
+  virtual mirror::Object* MarkObject(mirror::Object* obj) = 0;
+  // Mark an object and update the value stored in the heap reference if the object moved.
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj) = 0;
+};
 
 }  // namespace art
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index d08af71..7772354 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -244,10 +244,11 @@
           .AppendValues()
           .IntoKey(M::ImageCompilerOptions)
       .Define("-Xverify:_")
-          .WithType<bool>()
-          .WithValueMap({{"none", false},
-                         {"remote", true},
-                         {"all", true}})
+          .WithType<verifier::VerifyMode>()
+          .WithValueMap({{"none",     verifier::VerifyMode::kNone},
+                         {"remote",   verifier::VerifyMode::kEnable},
+                         {"all",      verifier::VerifyMode::kEnable},
+                         {"softfail", verifier::VerifyMode::kSoftFail}})
           .IntoKey(M::Verify)
       .Define("-XX:NativeBridge=_")
           .WithType<std::string>()
@@ -686,7 +687,7 @@
   UsageMessage(stream, "  -esa\n");
   UsageMessage(stream, "  -dsa\n");
   UsageMessage(stream, "   (-enablesystemassertions, -disablesystemassertions)\n");
-  UsageMessage(stream, "  -Xverify:{none,remote,all}\n");
+  UsageMessage(stream, "  -Xverify:{none,remote,all,softfail}\n");
   UsageMessage(stream, "  -Xrs\n");
   UsageMessage(stream, "  -Xint:portable, -Xint:fast, -Xint:jit\n");
   UsageMessage(stream, "  -Xdexopt:{none,verified,all,full}\n");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1aab933..cc8b215 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -75,6 +75,7 @@
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
+#include "lambda/box_table.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -185,7 +186,7 @@
       system_class_loader_(nullptr),
       dump_gc_performance_on_shutdown_(false),
       preinitialization_transaction_(nullptr),
-      verify_(false),
+      verify_(verifier::VerifyMode::kNone),
       allow_dex_file_fallback_(true),
       target_sdk_version_(0),
       implicit_null_checks_(false),
@@ -403,11 +404,12 @@
   }
 }
 
-void Runtime::SweepSystemWeaks(IsMarkedCallback* visitor, void* arg) {
-  GetInternTable()->SweepInternTableWeaks(visitor, arg);
-  GetMonitorList()->SweepMonitorList(visitor, arg);
-  GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
-  GetHeap()->SweepAllocationRecords(visitor, arg);
+void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor) {
+  GetInternTable()->SweepInternTableWeaks(visitor);
+  GetMonitorList()->SweepMonitorList(visitor);
+  GetJavaVM()->SweepJniWeakGlobals(visitor);
+  GetHeap()->SweepAllocationRecords(visitor);
+  GetLambdaBoxTable()->SweepWeakBoxedLambdas(visitor);
 }
 
 bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
@@ -912,6 +914,9 @@
     jit_options_->SetUseJIT(false);
   }
 
+  // Allocate a global table of boxed lambda objects <-> closures.
+  lambda_box_table_ = MakeUnique<lambda::BoxTable>();
+
   // Use MemMap arena pool for jit, malloc otherwise. Malloc arenas are faster to allocate but
   // can't be trimmed as easily.
   const bool use_malloc = IsAotCompiler();
@@ -1497,24 +1502,27 @@
 
 void Runtime::DisallowNewSystemWeaks() {
   monitor_list_->DisallowNewMonitors();
-  intern_table_->DisallowNewInterns();
+  intern_table_->ChangeWeakRootState(gc::kWeakRootStateNoReadsOrWrites);
   java_vm_->DisallowNewWeakGlobals();
   heap_->DisallowNewAllocationRecords();
+  lambda_box_table_->DisallowNewWeakBoxedLambdas();
 }
 
 void Runtime::AllowNewSystemWeaks() {
   monitor_list_->AllowNewMonitors();
-  intern_table_->AllowNewInterns();
+  intern_table_->ChangeWeakRootState(gc::kWeakRootStateNormal);  // TODO: Do this in the sweeping?
   java_vm_->AllowNewWeakGlobals();
   heap_->AllowNewAllocationRecords();
+  lambda_box_table_->AllowNewWeakBoxedLambdas();
 }
 
 void Runtime::EnsureNewSystemWeaksDisallowed() {
   // Lock and unlock the system weak locks once to ensure that no
   // threads are still in the middle of adding new system weaks.
   monitor_list_->EnsureNewMonitorsDisallowed();
-  intern_table_->EnsureNewInternsDisallowed();
+  intern_table_->EnsureNewWeakInternsDisallowed();
   java_vm_->EnsureNewWeakGlobalsDisallowed();
+  lambda_box_table_->EnsureNewWeakBoxedLambdasDisallowed();
 }
 
 void Runtime::BroadcastForNewSystemWeaks() {
@@ -1757,4 +1765,12 @@
   imt_unimplemented_method_ = method;
 }
 
+bool Runtime::IsVerificationEnabled() const {
+  return verify_ == verifier::VerifyMode::kEnable;
+}
+
+bool Runtime::IsVerificationSoftFail() const {
+  return verify_ == verifier::VerifyMode::kSoftFail;
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index c1fa55a..55adaf1 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -53,6 +53,10 @@
   class JitOptions;
 }  // namespace jit
 
+namespace lambda {
+  class BoxTable;
+}  // namespace lambda
+
 namespace mirror {
   class ClassLoader;
   class Array;
@@ -64,6 +68,7 @@
 }  // namespace mirror
 namespace verifier {
   class MethodVerifier;
+  enum class VerifyMode : int8_t;
 }  // namespace verifier
 class ArenaPool;
 class ArtMethod;
@@ -336,7 +341,7 @@
 
   // Sweep system weaks, the system weak is deleted if the visitor return null. Otherwise, the
   // system weak is updated to be the visitor's returned value.
-  void SweepSystemWeaks(IsMarkedCallback* visitor, void* arg)
+  void SweepSystemWeaks(IsMarkedVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Constant roots are the roots which never change after the runtime is initialized, they only
@@ -500,9 +505,8 @@
     return !implicit_so_checks_;
   }
 
-  bool IsVerificationEnabled() const {
-    return verify_;
-  }
+  bool IsVerificationEnabled() const;
+  bool IsVerificationSoftFail() const;
 
   bool IsDexFileFallbackEnabled() const {
     return allow_dex_file_fallback_;
@@ -532,6 +536,10 @@
     return experimental_lambdas_;
   }
 
+  lambda::BoxTable* GetLambdaBoxTable() const {
+    return lambda_box_table_.get();
+  }
+
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
@@ -646,6 +654,8 @@
   std::unique_ptr<jit::Jit> jit_;
   std::unique_ptr<jit::JitOptions> jit_options_;
 
+  std::unique_ptr<lambda::BoxTable> lambda_box_table_;
+
   // Fault message, printed when we get a SIGSEGV.
   Mutex fault_message_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
@@ -700,8 +710,8 @@
   // Transaction used for pre-initializing classes at compilation time.
   Transaction* preinitialization_transaction_;
 
-  // If false, verification is disabled. True by default.
-  bool verify_;
+  // If kNone, verification is disabled. kEnable by default.
+  verifier::VerifyMode verify_;
 
   // If true, the runtime may use dex files directly with the interpreter if an oat file is not
   // available/usable.
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index dc4c0c7..9922c5f 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -106,7 +106,8 @@
                                           CompilerOptions)  // -Xcompiler-option ...
 RUNTIME_OPTIONS_KEY (std::vector<std::string>, \
                                           ImageCompilerOptions)  // -Ximage-compiler-option ...
-RUNTIME_OPTIONS_KEY (bool,                Verify,                         true)
+RUNTIME_OPTIONS_KEY (verifier::VerifyMode, \
+                                          Verify,                         verifier::VerifyMode::kEnable)
 RUNTIME_OPTIONS_KEY (std::string,         NativeBridge)
 RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           10)
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 7e59000..88ac00a 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -32,6 +32,7 @@
 #include "gc/space/large_object_space.h"
 #include "profiler_options.h"
 #include "arch/instruction_set.h"
+#include "verifier/verify_mode.h"
 #include <stdio.h>
 #include <stdarg.h>
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 6f3b0a3..fede91c 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -904,7 +904,7 @@
       CHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), pointer_size);
     }
   }
-  DCHECK_EQ(frame_size & (kStackAlignment - 1), 0U);
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK_NE(reg, -1);
   int spill_size = POPCOUNT(core_spills) * GetBytesPerGprSpillLocation(isa)
       + POPCOUNT(fp_spills) * GetBytesPerFprSpillLocation(isa)
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 962132b..5544507 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -27,16 +27,17 @@
 constexpr uint32_t StackMap::kNoDexRegisterMap;
 constexpr uint32_t StackMap::kNoInlineInfo;
 
-DexRegisterLocation::Kind DexRegisterMap::GetLocationInternalKind(uint16_t dex_register_number,
-                                                                  uint16_t number_of_dex_registers,
-                                                                  const CodeInfo& code_info,
-                                                                  const StackMapEncoding& enc) const {
+DexRegisterLocation::Kind DexRegisterMap::GetLocationInternalKind(
+    uint16_t dex_register_number,
+    uint16_t number_of_dex_registers,
+    const CodeInfo& code_info,
+    const StackMapEncoding& enc) const {
   DexRegisterLocationCatalog dex_register_location_catalog =
       code_info.GetDexRegisterLocationCatalog(enc);
   size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
       dex_register_number,
       number_of_dex_registers,
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries());
+      code_info.GetNumberOfLocationCatalogEntries());
   return dex_register_location_catalog.GetLocationInternalKind(location_catalog_entry_index);
 }
 
@@ -49,7 +50,7 @@
   size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
       dex_register_number,
       number_of_dex_registers,
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries());
+      code_info.GetNumberOfLocationCatalogEntries());
   return dex_register_location_catalog.GetDexRegisterLocation(location_catalog_entry_index);
 }
 
@@ -140,8 +141,7 @@
 void DexRegisterLocationCatalog::Dump(VariableIndentationOutputStream* vios,
                                       const CodeInfo& code_info) {
   StackMapEncoding encoding = code_info.ExtractEncoding();
-  size_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   size_t location_catalog_size_in_bytes = code_info.GetDexRegisterLocationCatalogSize(encoding);
   vios->Stream()
       << "DexRegisterLocationCatalog (number_of_entries=" << number_of_location_catalog_entries
@@ -157,8 +157,7 @@
                           const CodeInfo& code_info,
                           uint16_t number_of_dex_registers) const {
   StackMapEncoding encoding = code_info.ExtractEncoding();
-  size_t number_of_location_catalog_entries =
-      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   // TODO: Display the bit mask of live Dex registers.
   for (size_t j = 0; j < number_of_dex_registers; ++j) {
     if (IsDexRegisterLive(j)) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index e8769f9..0d3816b 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -23,6 +23,12 @@
 
 namespace art {
 
+#define ELEMENT_BYTE_OFFSET_AFTER(PreviousElement) \
+  k ## PreviousElement ## Offset + sizeof(PreviousElement ## Type)
+
+#define ELEMENT_BIT_OFFSET_AFTER(PreviousElement) \
+  k ## PreviousElement ## BitOffset + PreviousElement ## BitSize
+
 class VariableIndentationOutputStream;
 
 // Size of a frame slot, in bytes.  This constant is a signed value,
@@ -33,6 +39,11 @@
 // Size of Dex virtual registers.
 static constexpr size_t kVRegSize = 4;
 
+// We encode the number of bytes needed for writing a value on 3 bits
+// (i.e. up to 8 values), for values that we know are maximum 32-bit
+// long.
+static constexpr size_t kNumberOfBitForNumberOfBytesForEncoding = 3;
+
 class CodeInfo;
 class StackMapEncoding;
 
@@ -190,7 +201,9 @@
 /**
  * Store information on unique Dex register locations used in a method.
  * The information is of the form:
- * [DexRegisterLocation+].
+ *
+ *   [DexRegisterLocation+].
+ *
  * DexRegisterLocations are either 1- or 5-byte wide (see art::DexRegisterLocation::Kind).
  */
 class DexRegisterLocationCatalog {
@@ -427,7 +440,9 @@
 /* Information on Dex register locations for a specific PC, mapping a
  * stack map's Dex register to a location entry in a DexRegisterLocationCatalog.
  * The information is of the form:
- * [live_bit_mask, entries*]
+ *
+ *   [live_bit_mask, entries*]
+ *
  * where entries are concatenated unsigned integer values encoded on a number
  * of bits (fixed per DexRegisterMap instances of a CodeInfo object) depending
  * on the number of entries in the Dex register location catalog
@@ -752,8 +767,9 @@
  * - Knowing the values of dex registers.
  *
  * The information is of the form:
- * [dex_pc, native_pc_offset, dex_register_map_offset, inlining_info_offset, register_mask,
- * stack_mask].
+ *
+ *   [dex_pc, native_pc_offset, dex_register_map_offset, inlining_info_offset, register_mask,
+ *   stack_mask].
  */
 class StackMap {
  public:
@@ -856,8 +872,6 @@
   static constexpr uint32_t kNoInlineInfo = -1;
 
  private:
-  // TODO: Instead of plain types such as "uint32_t", introduce
-  // typedefs (and document the memory layout of StackMap).
   static constexpr int kFixedSize = 0;
 
   // Loads `number_of_bytes` at the given `offset` and assemble a uint32_t. If `check_max` is true,
@@ -872,61 +886,74 @@
 
 /**
  * Inline information for a specific PC. The information is of the form:
- * [inlining_depth, [dex_pc, method_index, dex_register_map_offset]+]
+ *
+ *   [inlining_depth, entry+]
+ *
+ * where `entry` is of the form:
+ *
+ *   [dex_pc, method_index, dex_register_map_offset].
  */
 class InlineInfo {
  public:
+  // Memory layout: fixed contents.
+  typedef uint8_t DepthType;
+  // Memory layout: single entry contents.
+  typedef uint32_t MethodIndexType;
+  typedef uint32_t DexPcType;
+  typedef uint8_t InvokeTypeType;
+  typedef uint32_t DexRegisterMapType;
+
   explicit InlineInfo(MemoryRegion region) : region_(region) {}
 
-  uint8_t GetDepth() const {
-    return region_.LoadUnaligned<uint8_t>(kDepthOffset);
+  DepthType GetDepth() const {
+    return region_.LoadUnaligned<DepthType>(kDepthOffset);
   }
 
-  void SetDepth(uint8_t depth) {
-    region_.StoreUnaligned<uint8_t>(kDepthOffset, depth);
+  void SetDepth(DepthType depth) {
+    region_.StoreUnaligned<DepthType>(kDepthOffset, depth);
   }
 
-  uint32_t GetMethodIndexAtDepth(uint8_t depth) const {
-    return region_.LoadUnaligned<uint32_t>(
+  MethodIndexType GetMethodIndexAtDepth(DepthType depth) const {
+    return region_.LoadUnaligned<MethodIndexType>(
         kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset);
   }
 
-  void SetMethodIndexAtDepth(uint8_t depth, uint32_t index) {
-    region_.StoreUnaligned<uint32_t>(
+  void SetMethodIndexAtDepth(DepthType depth, MethodIndexType index) {
+    region_.StoreUnaligned<MethodIndexType>(
         kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset, index);
   }
 
-  uint32_t GetDexPcAtDepth(uint8_t depth) const {
-    return region_.LoadUnaligned<uint32_t>(
+  DexPcType GetDexPcAtDepth(DepthType depth) const {
+    return region_.LoadUnaligned<DexPcType>(
         kFixedSize + depth * SingleEntrySize() + kDexPcOffset);
   }
 
-  void SetDexPcAtDepth(uint8_t depth, uint32_t dex_pc) {
-    region_.StoreUnaligned<uint32_t>(
+  void SetDexPcAtDepth(DepthType depth, DexPcType dex_pc) {
+    region_.StoreUnaligned<DexPcType>(
         kFixedSize + depth * SingleEntrySize() + kDexPcOffset, dex_pc);
   }
 
-  uint8_t GetInvokeTypeAtDepth(uint8_t depth) const {
-    return region_.LoadUnaligned<uint8_t>(
+  InvokeTypeType GetInvokeTypeAtDepth(DepthType depth) const {
+    return region_.LoadUnaligned<InvokeTypeType>(
         kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset);
   }
 
-  void SetInvokeTypeAtDepth(uint8_t depth, uint8_t invoke_type) {
-    region_.StoreUnaligned<uint8_t>(
+  void SetInvokeTypeAtDepth(DepthType depth, InvokeTypeType invoke_type) {
+    region_.StoreUnaligned<InvokeTypeType>(
         kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset, invoke_type);
   }
 
-  uint32_t GetDexRegisterMapOffsetAtDepth(uint8_t depth) const {
-    return region_.LoadUnaligned<uint32_t>(
+  DexRegisterMapType GetDexRegisterMapOffsetAtDepth(DepthType depth) const {
+    return region_.LoadUnaligned<DexRegisterMapType>(
         kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset);
   }
 
-  void SetDexRegisterMapOffsetAtDepth(uint8_t depth, uint32_t offset) {
-    region_.StoreUnaligned<uint32_t>(
+  void SetDexRegisterMapOffsetAtDepth(DepthType depth, DexRegisterMapType offset) {
+    region_.StoreUnaligned<DexRegisterMapType>(
         kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset, offset);
   }
 
-  bool HasDexRegisterMapAtDepth(uint8_t depth) const {
+  bool HasDexRegisterMapAtDepth(DepthType depth) const {
     return GetDexRegisterMapOffsetAtDepth(depth) != StackMap::kNoDexRegisterMap;
   }
 
@@ -937,17 +964,16 @@
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& info, uint16_t* number_of_dex_registers) const;
 
+
  private:
-  // TODO: Instead of plain types such as "uint8_t", introduce
-  // typedefs (and document the memory layout of InlineInfo).
   static constexpr int kDepthOffset = 0;
-  static constexpr int kFixedSize = kDepthOffset + sizeof(uint8_t);
+  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(Depth);
 
   static constexpr int kMethodIndexOffset = 0;
-  static constexpr int kDexPcOffset = kMethodIndexOffset + sizeof(uint32_t);
-  static constexpr int kInvokeTypeOffset = kDexPcOffset + sizeof(uint32_t);
-  static constexpr int kDexRegisterMapOffset = kInvokeTypeOffset + sizeof(uint8_t);
-  static constexpr int kFixedEntrySize = kDexRegisterMapOffset + sizeof(uint32_t);
+  static constexpr int kDexPcOffset = ELEMENT_BYTE_OFFSET_AFTER(MethodIndex);
+  static constexpr int kInvokeTypeOffset = ELEMENT_BYTE_OFFSET_AFTER(DexPc);
+  static constexpr int kDexRegisterMapOffset = ELEMENT_BYTE_OFFSET_AFTER(InvokeType);
+  static constexpr int kFixedEntrySize = ELEMENT_BYTE_OFFSET_AFTER(DexRegisterMap);
 
   MemoryRegion region_;
 
@@ -959,11 +985,32 @@
 /**
  * Wrapper around all compiler information collected for a method.
  * The information is of the form:
- * [overall_size, number_of_location_catalog_entries, number_of_stack_maps, stack_mask_size,
- * DexRegisterLocationCatalog+, StackMap+, DexRegisterMap+, InlineInfo*].
+ *
+ *   [overall_size, encoding_info, number_of_location_catalog_entries, number_of_stack_maps,
+ *   stack_mask_size, DexRegisterLocationCatalog+, StackMap+, DexRegisterMap+, InlineInfo*]
+ *
+ * where `encoding_info` is of the form:
+ *
+ *  [has_inline_info, inline_info_size_in_bytes, dex_register_map_size_in_bytes,
+ *  dex_pc_size_in_bytes, native_pc_size_in_bytes, register_mask_size_in_bytes].
  */
 class CodeInfo {
  public:
+  // Memory layout: fixed contents.
+  typedef uint32_t OverallSizeType;
+  typedef uint16_t EncodingInfoType;
+  typedef uint32_t NumberOfLocationCatalogEntriesType;
+  typedef uint32_t NumberOfStackMapsType;
+  typedef uint32_t StackMaskSizeType;
+
+  // Memory (bit) layout: encoding info.
+  static constexpr int HasInlineInfoBitSize = 1;
+  static constexpr int InlineInfoBitSize = kNumberOfBitForNumberOfBytesForEncoding;
+  static constexpr int DexRegisterMapBitSize = kNumberOfBitForNumberOfBytesForEncoding;
+  static constexpr int DexPcBitSize = kNumberOfBitForNumberOfBytesForEncoding;
+  static constexpr int NativePcBitSize = kNumberOfBitForNumberOfBytesForEncoding;
+  static constexpr int RegisterMaskBitSize = kNumberOfBitForNumberOfBytesForEncoding;
+
   explicit CodeInfo(MemoryRegion region) : region_(region) {}
 
   explicit CodeInfo(const void* data) {
@@ -991,17 +1038,11 @@
   }
 
   void SetEncodingAt(size_t bit_offset, size_t number_of_bytes) {
-    // We encode the number of bytes needed for writing a value on 3 bits,
-    // for values that we know are maximum 32bits.
-    region_.StoreBit(bit_offset, (number_of_bytes & 1));
-    region_.StoreBit(bit_offset + 1, (number_of_bytes & 2));
-    region_.StoreBit(bit_offset + 2, (number_of_bytes & 4));
+    region_.StoreBits(bit_offset, number_of_bytes, kNumberOfBitForNumberOfBytesForEncoding);
   }
 
   size_t GetNumberOfBytesForEncoding(size_t bit_offset) const {
-    return region_.LoadBit(bit_offset)
-        + (region_.LoadBit(bit_offset + 1) << 1)
-        + (region_.LoadBit(bit_offset + 2) << 2);
+    return region_.LoadBits(bit_offset, kNumberOfBitForNumberOfBytesForEncoding);
   }
 
   bool HasInlineInfo() const {
@@ -1019,33 +1060,35 @@
     return StackMap(GetStackMaps(encoding).Subregion(i * stack_map_size, stack_map_size));
   }
 
-  uint32_t GetOverallSize() const {
-    return region_.LoadUnaligned<uint32_t>(kOverallSizeOffset);
+  OverallSizeType GetOverallSize() const {
+    return region_.LoadUnaligned<OverallSizeType>(kOverallSizeOffset);
   }
 
-  void SetOverallSize(uint32_t size) {
-    region_.StoreUnaligned<uint32_t>(kOverallSizeOffset, size);
+  void SetOverallSize(OverallSizeType size) {
+    region_.StoreUnaligned<OverallSizeType>(kOverallSizeOffset, size);
   }
 
-  uint32_t GetNumberOfDexRegisterLocationCatalogEntries() const {
-    return region_.LoadUnaligned<uint32_t>(kNumberOfDexRegisterLocationCatalogEntriesOffset);
+  NumberOfLocationCatalogEntriesType GetNumberOfLocationCatalogEntries() const {
+    return region_.LoadUnaligned<NumberOfLocationCatalogEntriesType>(
+        kNumberOfLocationCatalogEntriesOffset);
   }
 
-  void SetNumberOfDexRegisterLocationCatalogEntries(uint32_t num_entries) {
-    region_.StoreUnaligned<uint32_t>(kNumberOfDexRegisterLocationCatalogEntriesOffset, num_entries);
+  void SetNumberOfLocationCatalogEntries(NumberOfLocationCatalogEntriesType num_entries) {
+    region_.StoreUnaligned<NumberOfLocationCatalogEntriesType>(
+        kNumberOfLocationCatalogEntriesOffset, num_entries);
   }
 
   uint32_t GetDexRegisterLocationCatalogSize(const StackMapEncoding& encoding) const {
     return ComputeDexRegisterLocationCatalogSize(GetDexRegisterLocationCatalogOffset(encoding),
-                                                 GetNumberOfDexRegisterLocationCatalogEntries());
+                                                 GetNumberOfLocationCatalogEntries());
   }
 
-  size_t GetNumberOfStackMaps() const {
-    return region_.LoadUnaligned<uint32_t>(kNumberOfStackMapsOffset);
+  NumberOfStackMapsType GetNumberOfStackMaps() const {
+    return region_.LoadUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset);
   }
 
-  void SetNumberOfStackMaps(uint32_t number_of_stack_maps) {
-    region_.StoreUnaligned<uint32_t>(kNumberOfStackMapsOffset, number_of_stack_maps);
+  void SetNumberOfStackMaps(NumberOfStackMapsType number_of_stack_maps) {
+    region_.StoreUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset, number_of_stack_maps);
   }
 
   // Get the size all the stack maps of this CodeInfo object, in bytes.
@@ -1130,23 +1173,28 @@
             bool dump_stack_maps) const;
 
  private:
-  // TODO: Instead of plain types such as "uint32_t", introduce
-  // typedefs (and document the memory layout of CodeInfo).
   static constexpr int kOverallSizeOffset = 0;
-  static constexpr int kEncodingInfoOffset = kOverallSizeOffset + sizeof(uint32_t);
-  static constexpr int kNumberOfDexRegisterLocationCatalogEntriesOffset =
-      kEncodingInfoOffset + sizeof(uint16_t);
+  static constexpr int kEncodingInfoOffset = ELEMENT_BYTE_OFFSET_AFTER(OverallSize);
+  static constexpr int kNumberOfLocationCatalogEntriesOffset =
+      ELEMENT_BYTE_OFFSET_AFTER(EncodingInfo);
   static constexpr int kNumberOfStackMapsOffset =
-      kNumberOfDexRegisterLocationCatalogEntriesOffset + sizeof(uint32_t);
-  static constexpr int kStackMaskSizeOffset = kNumberOfStackMapsOffset + sizeof(uint32_t);
-  static constexpr int kFixedSize = kStackMaskSizeOffset + sizeof(uint32_t);
+      ELEMENT_BYTE_OFFSET_AFTER(NumberOfLocationCatalogEntries);
+  static constexpr int kStackMaskSizeOffset = ELEMENT_BYTE_OFFSET_AFTER(NumberOfStackMaps);
+  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(StackMaskSize);
 
-  static constexpr int kHasInlineInfoBitOffset = (kEncodingInfoOffset * kBitsPerByte);
-  static constexpr int kInlineInfoBitOffset = kHasInlineInfoBitOffset + 1;
-  static constexpr int kDexRegisterMapBitOffset = kInlineInfoBitOffset + 3;
-  static constexpr int kDexPcBitOffset = kDexRegisterMapBitOffset + 3;
-  static constexpr int kNativePcBitOffset = kDexPcBitOffset + 3;
-  static constexpr int kRegisterMaskBitOffset = kNativePcBitOffset + 3;
+  static constexpr int kHasInlineInfoBitOffset = kEncodingInfoOffset * kBitsPerByte;
+  static constexpr int kInlineInfoBitOffset = ELEMENT_BIT_OFFSET_AFTER(HasInlineInfo);
+  static constexpr int kDexRegisterMapBitOffset = ELEMENT_BIT_OFFSET_AFTER(InlineInfo);
+  static constexpr int kDexPcBitOffset = ELEMENT_BIT_OFFSET_AFTER(DexRegisterMap);
+  static constexpr int kNativePcBitOffset = ELEMENT_BIT_OFFSET_AFTER(DexPc);
+  static constexpr int kRegisterMaskBitOffset = ELEMENT_BIT_OFFSET_AFTER(NativePc);
+
+  static constexpr int kEncodingInfoPastTheEndBitOffset = ELEMENT_BIT_OFFSET_AFTER(RegisterMask);
+  static constexpr int kEncodingInfoOverallBitSize =
+      kEncodingInfoPastTheEndBitOffset - kHasInlineInfoBitOffset;
+
+  static_assert(kEncodingInfoOverallBitSize <= (sizeof(EncodingInfoType) * kBitsPerByte),
+                "art::CodeInfo::EncodingInfoType is too short to hold all encoding info elements.");
 
   MemoryRegion GetStackMaps(const StackMapEncoding& encoding) const {
     return region_.size() == 0
@@ -1169,7 +1217,7 @@
     size_t number_of_live_dex_registers =
         dex_register_map_without_locations.GetNumberOfLiveDexRegisters(number_of_dex_registers);
     size_t location_mapping_data_size_in_bits =
-        DexRegisterMap::SingleEntrySizeInBits(GetNumberOfDexRegisterLocationCatalogEntries())
+        DexRegisterMap::SingleEntrySizeInBits(GetNumberOfLocationCatalogEntries())
         * number_of_live_dex_registers;
     size_t location_mapping_data_size_in_bytes =
         RoundUp(location_mapping_data_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -1211,6 +1259,9 @@
   friend class StackMapStream;
 };
 
+#undef ELEMENT_BYTE_OFFSET_AFTER
+#undef ELEMENT_BIT_OFFSET_AFTER
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_STACK_MAP_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index cede998..a2edfa3 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1180,6 +1180,7 @@
     if (m->IsRuntimeMethod()) {
       return true;
     }
+    m = m->GetInterfaceMethodIfProxy(sizeof(void*));
     const int kMaxRepetition = 3;
     mirror::Class* c = m->GetDeclaringClass();
     mirror::DexCache* dex_cache = c->GetDexCache();
@@ -2734,4 +2735,12 @@
   tlsPtr_.method_verifier = verifier->link_;
 }
 
+size_t Thread::NumberOfHeldMutexes() const {
+  size_t count = 0;
+  for (BaseMutex* mu : tlsPtr_.held_mutexes) {
+    count += static_cast<size_t>(mu != nullptr);
+  }
+  return count;
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 7826e62..cf87f22 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -288,6 +288,8 @@
     return tls32_.daemon;
   }
 
+  size_t NumberOfHeldMutexes() const;
+
   bool HoldsLock(mirror::Object*) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 386f81f..60c9b5e 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -381,23 +381,7 @@
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
   CHECK_NE(self->GetState(), kRunnable);
 
-  std::vector<Thread*> runnable_threads;
-  std::vector<Thread*> other_threads;
-
-  // Suspend all threads once.
-  {
-    MutexLock mu(self, *Locks::thread_list_lock_);
-    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-    // Update global suspend all state for attaching threads.
-    ++suspend_all_count_;
-    // Increment everybody's suspend count (except our own).
-    for (const auto& thread : list_) {
-      if (thread == self) {
-        continue;
-      }
-      thread->ModifySuspendCount(self, +1, nullptr, false);
-    }
-  }
+  SuspendAllInternal(self, self, nullptr);
 
   // Run the flip callback for the collector.
   Locks::mutator_lock_->ExclusiveLock(self);
@@ -406,6 +390,8 @@
   collector->RegisterPause(NanoTime() - start_time);
 
   // Resume runnable threads.
+  std::vector<Thread*> runnable_threads;
+  std::vector<Thread*> other_threads;
   {
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
@@ -585,8 +571,10 @@
 
   // Wait for the barrier to be passed by all runnable threads. This wait
   // is done with a timeout so that we can detect problems.
+#if ART_USE_FUTEXES
   timespec wait_timeout;
   InitTimeSpec(true, CLOCK_MONOTONIC, 10000, 0, &wait_timeout);
+#endif
   while (true) {
     int32_t cur_val = pending_threads.LoadRelaxed();
     if (LIKELY(cur_val > 0)) {
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index c7ea7f4..c000e61 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -43,6 +43,7 @@
   kWaitingForMethodTracingStart,    // WAITING        TS_WAIT      waiting for method tracing to start
   kWaitingForVisitObjects,          // WAITING        TS_WAIT      waiting for visiting objects
   kWaitingForGetObjectsAllocated,   // WAITING        TS_WAIT      waiting for getting the number of allocated objects
+  kWaitingWeakRootRead,             // WAITING        TS_WAIT      waiting to read a weak root
   kStarting,                        // NEW            TS_WAIT      native thread started, not yet ready to run managed code
   kNative,                          // RUNNABLE       TS_RUNNING   running in a JNI native method
   kSuspended,                       // RUNNABLE       TS_RUNNING   suspended by GC or debugger
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 11c3e65..8c950a0 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -329,14 +329,21 @@
   } else {
     // Bad method data.
     CHECK_NE(verifier.failures_.size(), 0U);
-    CHECK(verifier.have_pending_hard_failure_);
-    verifier.DumpFailures(LOG(INFO) << "Verification error in "
-                                    << PrettyMethod(method_idx, *dex_file) << "\n");
+
+    if (UNLIKELY(verifier.have_pending_experimental_failure_)) {
+      // Failed due to being forced into interpreter. This is ok because
+      // we just want to skip verification.
+      result = kSoftFailure;
+    } else {
+      CHECK(verifier.have_pending_hard_failure_);
+      verifier.DumpFailures(LOG(INFO) << "Verification error in "
+                                      << PrettyMethod(method_idx, *dex_file) << "\n");
+      result = kHardFailure;
+    }
     if (gDebugVerify) {
       std::cout << "\n" << verifier.info_messages_.str();
       verifier.Dump(std::cout);
     }
-    result = kHardFailure;
   }
   if (kTimeVerifyMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
@@ -402,6 +409,7 @@
       monitor_enter_dex_pcs_(nullptr),
       have_pending_hard_failure_(false),
       have_pending_runtime_throw_failure_(false),
+      have_pending_experimental_failure_(false),
       have_any_pending_runtime_throw_failure_(false),
       new_instance_count_(0),
       monitor_enter_count_(0),
@@ -813,6 +821,17 @@
 }
 
 bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_offset) {
+  if (UNLIKELY(inst->IsExperimental())) {
+    // Experimental instructions don't yet have verifier support implementation.
+    // While it is possible to use them by themselves, when we try to use stable instructions
+    // with a virtual register that was created by an experimental instruction,
+    // the data flow analysis will fail.
+    Fail(VERIFY_ERROR_FORCE_INTERPRETER)
+        << "experimental instruction is not supported by verifier; skipping verification";
+    have_pending_experimental_failure_ = true;
+    return false;
+  }
+
   bool result = true;
   switch (inst->GetVerifyTypeArgumentA()) {
     case Instruction::kVerifyRegA:
@@ -2946,6 +2965,12 @@
       // If the code would've normally hard-failed, then the interpreter will throw the
       // appropriate verification errors at runtime.
       Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement box-lambda verification
+
+      // Partial verification. Sets the resulting type to always be an object, which
+      // is good enough for some other verification to occur without hard-failing.
+      const uint32_t vreg_target_object = inst->VRegA_22x();  // box-lambda vA, vB
+      const RegType& reg_type = reg_types_.JavaLangObject(need_precise_constants_);
+      work_line_->SetRegisterType(this, vreg_target_object, reg_type);
       break;
     }
 
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index d933448..a2835f5 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -736,6 +736,8 @@
   // instructions that would hard fail the verification.
   // Note: this flag is reset after processing each instruction.
   bool have_pending_runtime_throw_failure_;
+  // Is there a pending experimental failure?
+  bool have_pending_experimental_failure_;
 
   // A version of the above that is not reset and thus captures if there were *any* throw failures.
   bool have_any_pending_runtime_throw_failure_;
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 1435607..6e23234 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -583,24 +583,20 @@
 
 const RegType& RegType::Merge(const RegType& incoming_type, RegTypeCache* reg_types) const {
   DCHECK(!Equals(incoming_type));  // Trivial equality handled by caller
-  // Perform pointer equality tests for conflict to avoid virtual method dispatch.
+  // Perform pointer equality tests for undefined and conflict to avoid virtual method dispatch.
+  const UndefinedType& undefined = reg_types->Undefined();
   const ConflictType& conflict = reg_types->Conflict();
-  if (IsUndefined() || incoming_type.IsUndefined()) {
+  DCHECK_EQ(this == &undefined, IsUndefined());
+  DCHECK_EQ(&incoming_type == &undefined, incoming_type.IsUndefined());
+  DCHECK_EQ(this == &conflict, IsConflict());
+  DCHECK_EQ(&incoming_type == &conflict, incoming_type.IsConflict());
+  if (this == &undefined || &incoming_type == &undefined) {
     // There is a difference between undefined and conflict. Conflicts may be copied around, but
     // not used. Undefined registers must not be copied. So any merge with undefined should return
     // undefined.
-    if (IsUndefined()) {
-      return *this;
-    }
-    return incoming_type;
-  } else if (this == &conflict) {
-    DCHECK(IsConflict());
-    return *this;  // Conflict MERGE * => Conflict
-  } else if (&incoming_type == &conflict) {
-    DCHECK(incoming_type.IsConflict());
-    return incoming_type;  // * MERGE Conflict => Conflict
-  } else if (IsUndefined() || incoming_type.IsUndefined()) {
-    return conflict;  // Unknown MERGE * => Conflict
+    return undefined;
+  } else if (this == &conflict || &incoming_type == &conflict) {
+    return conflict;  // (Conflict MERGE *) or (* MERGE Conflict) => Conflict
   } else if (IsConstant() && incoming_type.IsConstant()) {
     const ConstantType& type1 = *down_cast<const ConstantType*>(this);
     const ConstantType& type2 = *down_cast<const ConstantType*>(&incoming_type);
@@ -694,6 +690,11 @@
   } else if (IsReferenceTypes() && incoming_type.IsReferenceTypes()) {
     if (IsZero() || incoming_type.IsZero()) {
       return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
+    } else if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
+      // Something that is uninitialized hasn't had its constructor called. Unitialized types are
+      // special. They may only ever be merged with themselves (must be taken care of by the
+      // caller of Merge(), see the DCHECK on entry). So mark any other merge as conflicting here.
+      return conflict;
     } else if (IsJavaLangObject() || incoming_type.IsJavaLangObject()) {
       return reg_types->JavaLangObject(false);  // Object MERGE ref => Object
     } else if (IsUnresolvedTypes() || incoming_type.IsUnresolvedTypes()) {
@@ -702,11 +703,6 @@
       // type that reflects our lack of knowledge and that allows the rest of the unresolved
       // mechanics to continue.
       return reg_types->FromUnresolvedMerge(*this, incoming_type);
-    } else if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
-      // Something that is uninitialized hasn't had its constructor called. Mark any merge
-      // of this type with something that is initialized as conflicting. The cases of a merge
-      // with itself, 0 or Object are handled above.
-      return conflict;
     } else {  // Two reference types, compute Join
       mirror::Class* c1 = GetClass();
       mirror::Class* c2 = incoming_type.GetClass();
diff --git a/runtime/verifier/verify_mode.h b/runtime/verifier/verify_mode.h
new file mode 100644
index 0000000..bea4378
--- /dev/null
+++ b/runtime/verifier/verify_mode.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_VERIFIER_VERIFY_MODE_H_
+#define ART_RUNTIME_VERIFIER_VERIFY_MODE_H_
+
+#include <stdint.h>
+
+namespace art {
+namespace verifier {
+
+// The mode that the verifier should run as.
+enum class VerifyMode : int8_t {
+  kNone,      // Everything is assumed verified.
+  kEnable,    // Standard verification, try pre-verifying at compile-time.
+  kSoftFail,  // Force a soft fail, punting to the interpreter with access checks.
+};
+
+}  // namespace verifier
+}  // namespace art
+
+#endif  // ART_RUNTIME_VERIFIER_VERIFY_MODE_H_
diff --git a/test/011-array-copy/src/Main.java b/test/011-array-copy/src/Main.java
index 505d8b0..96e1dbf 100644
--- a/test/011-array-copy/src/Main.java
+++ b/test/011-array-copy/src/Main.java
@@ -23,6 +23,7 @@
     public static void main(String args[]) {
         testObjectCopy();
         testOverlappingMoves();
+        testFloatAndDouble();
     }
 
     public static void testObjectCopy() {
@@ -143,4 +144,13 @@
         /* copy forward, mixed alignment, trivial length */
         makeCopies(0, 5, 1);
     }
+
+    private static void testFloatAndDouble() {
+        // Float & double copies have the same implementation as int & long. However, there are
+        // protective DCHECKs in the code (there is nothing unifying like ByteSizedArray or
+        // ShortSizedArray). Just test that we don't fail those checks.
+        final int len = 10;
+        System.arraycopy(new float[len], 0, new float[len], 0, len);
+        System.arraycopy(new double[len], 0, new double[len], 0, len);
+    }
 }
diff --git a/test/140-dce-regression/expected.txt b/test/140-dce-regression/expected.txt
new file mode 100644
index 0000000..863339f
--- /dev/null
+++ b/test/140-dce-regression/expected.txt
@@ -0,0 +1 @@
+Passed
diff --git a/test/140-dce-regression/info.txt b/test/140-dce-regression/info.txt
new file mode 100644
index 0000000..de6ad34
--- /dev/null
+++ b/test/140-dce-regression/info.txt
@@ -0,0 +1 @@
+Regression test for quick dead code elimination.
diff --git a/test/140-dce-regression/src/Main.java b/test/140-dce-regression/src/Main.java
new file mode 100644
index 0000000..f255029
--- /dev/null
+++ b/test/140-dce-regression/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+    public static void testArrayLength() {
+      int[] arr = null;
+      int len = 0;
+      try {
+        len = arr.length;
+        len = 5;
+      } catch (NullPointerException npe) {
+        System.out.println("Passed");
+      }
+    }
+
+    public static void main(String[] args) {
+      testArrayLength();
+    }
+}
diff --git a/test/140-field-packing/expected.txt b/test/140-field-packing/expected.txt
new file mode 100644
index 0000000..2b0a2ce
--- /dev/null
+++ b/test/140-field-packing/expected.txt
@@ -0,0 +1,2 @@
+running test...
+test completed.
diff --git a/test/140-field-packing/info.txt b/test/140-field-packing/info.txt
new file mode 100644
index 0000000..a28bd04
--- /dev/null
+++ b/test/140-field-packing/info.txt
@@ -0,0 +1 @@
+Test field packing for classes with various arrangements of fields.
diff --git a/test/140-field-packing/src/GapOrder.java b/test/140-field-packing/src/GapOrder.java
new file mode 100644
index 0000000..09d09b8
--- /dev/null
+++ b/test/140-field-packing/src/GapOrder.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Regression test for 22460222, the sub class.
+// The field gaps order was wrong. If there were two gaps of different sizes,
+// and the larger one was needed, it wouldn't be found.
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+
+class GapOrder extends GapOrderBase {
+  // The base class is 9 bytes. The entire class should be packed as:
+  //
+  //    00: oooo oooo
+  //    08: b-ss rrrr
+  //    16: rrrr iiii
+  //    24: dddd dddd
+  //
+  // The problem was, the packer wasn't finding the gap where iiii should go,
+  // because the gap where ss goes was given priority. Instead it packed as:
+  //    00: oooo oooo
+  //    08: b--- rrrr
+  //    16: rrrr ----
+  //    24: dddd dddd
+  //    32: iiii ss
+  public Object r1;
+  public Object r2;
+  public double d;
+  public int i;
+  public short s;
+
+  static private void CheckField(String fieldName, int expected) {
+    Field field = null;
+    try {
+      field = GapOrder.class.getField(fieldName);
+    } catch (ReflectiveOperationException e) {
+      System.out.println(fieldName + " not found in GapOrder.");
+      return;
+    }
+
+    int actual = -1;
+    try {
+      Method getOffset = Field.class.getMethod("getOffset");
+      actual = (Integer)getOffset.invoke(field);
+    } catch (ReflectiveOperationException e) {
+      System.out.println("Unable to get field offset for " + fieldName + ":" + e);
+      return;
+    }
+
+    if (actual != expected) {
+      System.out.println(
+          String.format("GapOrder.%s has offset %d, but expected %d",
+            fieldName, actual, expected));
+    }
+  }
+
+  static public void Check() {
+    CheckField("r1", 12);
+    CheckField("r2", 16);
+    CheckField("d", 24);
+    CheckField("i", 20);
+    CheckField("s", 10);
+  }
+}
+
diff --git a/test/140-field-packing/src/GapOrderBase.java b/test/140-field-packing/src/GapOrderBase.java
new file mode 100644
index 0000000..4a0b378
--- /dev/null
+++ b/test/140-field-packing/src/GapOrderBase.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Regression test for 22460222, the base class.
+// The field gaps order was wrong. If there were two gaps of different sizes,
+// and the larger one was needed, it wouldn't be found.
+
+// This class has a size of 9 bytes: 8 for object plus 1 for the field 'b'.
+class GapOrderBase {
+  public byte b;
+}
diff --git a/test/140-field-packing/src/Main.java b/test/140-field-packing/src/Main.java
new file mode 100644
index 0000000..2810b32
--- /dev/null
+++ b/test/140-field-packing/src/Main.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("running test...");
+    GapOrder.Check();
+    System.out.println("test completed.");
+  }
+}
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
index 0b75930..dd17e77 100644
--- a/test/463-checker-boolean-simplifier/src/Main.java
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -119,9 +119,6 @@
   /// CHECK-DAG:     <<Cond:z\d+>>     LessThan [<<ParamX>>,<<ParamY>>]
   /// CHECK-DAG:                       Return [<<Cond>>]
 
-  /// CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
-  /// CHECK-NOT:                       GreaterThanOrEqual
-
   public static boolean LessThan(int x, int y) {
     return (x < y) ? true : false;
   }
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
new file mode 100644
index 0000000..2ddcbce
--- /dev/null
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -0,0 +1,199 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSsaBuilder;
+
+.super Ljava/lang/Object;
+
+# Tests that catch blocks with both normal and exceptional predecessors are
+# split in two.
+
+## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) ssa_builder (after)
+
+## CHECK:      name             "B0"
+## CHECK-NEXT: from_bci
+## CHECK-NEXT: to_bci
+## CHECK-NEXT: predecessors
+## CHECK-NEXT: successors       "<<BExtracted:B\d+>>"
+
+## CHECK:      name             "<<BCatch:B\d+>>"
+## CHECK-NEXT: from_bci
+## CHECK-NEXT: to_bci
+## CHECK-NEXT: predecessors
+## CHECK-NEXT: successors       "<<BExtracted>>"
+## CHECK-NEXT: xhandlers
+## CHECK-NEXT: flags            "catch_block"
+## CHECK-NOT:  Add
+
+## CHECK:      name             "<<BExtracted>>"
+## CHECK-NEXT: from_bci
+## CHECK-NEXT: to_bci
+## CHECK-NEXT: predecessors     "B0" "<<BCatch>>"
+## CHECK-NOT:  flags            "catch_block"
+## CHECK:      Add
+
+.method public static testSimplifyCatchBlock(III)I
+    .registers 4
+
+    :catch_all
+    add-int/2addr p0, p1
+
+    :try_start
+    div-int/2addr p0, p2
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    return p0
+.end method
+
+# Should be rejected because :catch_all is a loop header.
+
+## CHECK-START: int SsaBuilder.testCatchLoopHeader(int, int, int) ssa_builder (after, bad_state)
+
+.method public static testCatchLoopHeader(III)I
+    .registers 4
+
+    :try_start_1
+    div-int/2addr p0, p1
+    return p0
+    :try_end_1
+    .catchall {:try_start_1 .. :try_end_1} :catch_all
+
+    :catch_all
+    :try_start_2
+    div-int/2addr p0, p2
+    return p0
+    :try_end_2
+    .catchall {:try_start_2 .. :try_end_2} :catch_all
+
+.end method
+
+# Tests creation of catch Phis.
+
+## CHECK-START: int SsaBuilder.testPhiCreation(int, int, int) ssa_builder (after)
+## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
+## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
+## CHECK-DAG:     <<P2:i\d+>>   ParameterValue
+
+## CHECK-DAG:     <<DZC1:i\d+>> DivZeroCheck [<<P1>>]
+## CHECK-DAG:     <<Div1:i\d+>> Div [<<P0>>,<<DZC1>>]
+## CHECK-DAG:     <<DZC2:i\d+>> DivZeroCheck [<<P1>>]
+## CHECK-DAG:     <<Div2:i\d+>> Div [<<Div1>>,<<DZC2>>]
+## CHECK-DAG:     <<DZC3:i\d+>> DivZeroCheck [<<P1>>]
+## CHECK-DAG:     <<Div3:i\d+>> Div [<<Div2>>,<<DZC3>>]
+
+## CHECK-DAG:     <<Phi1:i\d+>> Phi [<<P0>>,<<P1>>,<<P2>>] reg:0 is_catch_phi:true
+## CHECK-DAG:     <<Phi2:i\d+>> Phi [<<Div3>>,<<Phi1>>]    reg:0 is_catch_phi:false
+## CHECK-DAG:                   Return [<<Phi2>>]
+
+.method public static testPhiCreation(III)I
+    .registers 4
+
+    :try_start
+    move v0, p0
+    div-int/2addr p0, p1
+
+    move v0, p1
+    div-int/2addr p0, p1
+
+    move v0, p2
+    div-int/2addr p0, p1
+
+    move v0, p0
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return v0
+
+    :catch_all
+    goto :return
+.end method
+
+# Tests that phi elimination does not remove catch phis where the value does
+# not dominate the phi.
+
+## CHECK-START: int SsaBuilder.testPhiElimination(int, int) ssa_builder (after)
+## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
+## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
+## CHECK-DAG:     <<Cst5:i\d+>> IntConstant 5
+## CHECK-DAG:     <<Cst7:i\d+>> IntConstant 7
+
+## CHECK-DAG:     <<Add1:i\d+>> Add [<<Cst7>>,<<Cst7>>]
+## CHECK-DAG:     <<DZC:i\d+>>  DivZeroCheck [<<P1>>]
+## CHECK-DAG:     <<Div:i\d+>>  Div [<<P0>>,<<DZC>>]
+
+## CHECK-DAG:     <<Phi1:i\d+>> Phi [<<Add1>>] reg:1 is_catch_phi:true
+## CHECK-DAG:     <<Add2:i\d+>> Add [<<Cst5>>,<<Phi1>>]
+
+## CHECK-DAG:     <<Phi2:i\d+>> Phi [<<Cst5>>,<<Add2>>] reg:0 is_catch_phi:false
+## CHECK-DAG:                   Return [<<Phi2>>]
+
+.method public static testPhiElimination(II)I
+    .registers 4
+
+    :try_start
+    # The constant in entry block will dominate the vreg 0 catch phi.
+    const v0, 5
+
+    # Insert addition so that the value of vreg 1 does not dominate the phi.
+    const v1, 7
+    add-int/2addr v1, v1
+
+    div-int/2addr p0, p1
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return v0
+
+    :catch_all
+    add-int/2addr v0, v1
+    goto :return
+.end method
+
+# Tests that dead catch blocks are removed.
+
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (before)
+## CHECK:                       Mul
+
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
+## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
+## CHECK-DAG:     <<P2:i\d+>>   ParameterValue
+## CHECK-DAG:     <<Add1:i\d+>> Add [<<P0>>,<<P1>>]
+## CHECK-DAG:     <<Add2:i\d+>> Add [<<Add1>>,<<P2>>]
+## CHECK-DAG:                   Return [<<Add2>>]
+
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-NOT:                   flags "catch_block"
+## CHECK-NOT:                   Mul
+
+.method public static testDeadCatchBlock(III)I
+    .registers 4
+
+    :try_start
+    add-int/2addr p0, p1
+    add-int/2addr p0, p2
+    move v0, p0
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return v0
+
+    :catch_all
+    mul-int/2addr v1, v1
+    goto :return
+.end method
diff --git a/test/524-boolean-simplifier-regression/expected.txt b/test/524-boolean-simplifier-regression/expected.txt
new file mode 100644
index 0000000..863339f
--- /dev/null
+++ b/test/524-boolean-simplifier-regression/expected.txt
@@ -0,0 +1 @@
+Passed
diff --git a/test/524-boolean-simplifier-regression/info.txt b/test/524-boolean-simplifier-regression/info.txt
new file mode 100644
index 0000000..b38d71c
--- /dev/null
+++ b/test/524-boolean-simplifier-regression/info.txt
@@ -0,0 +1 @@
+Regression test for optimizing boolean simplifier.
diff --git a/test/524-boolean-simplifier-regression/src/Main.java b/test/524-boolean-simplifier-regression/src/Main.java
new file mode 100644
index 0000000..a8830bb
--- /dev/null
+++ b/test/524-boolean-simplifier-regression/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+    public static boolean test2() {
+        throw new NullPointerException();
+    }
+
+    public static boolean test1()  {
+        System.out.println("Passed");
+        try {
+            test2();
+        } catch (NullPointerException npe) {
+        }
+        return true;
+    }
+
+    public static void main(String[] args) {
+      boolean b=false;
+
+      b = (test1() || (b = b)) & b;
+    }
+}
diff --git a/test/525-arrays-and-fields/expected.txt b/test/525-arrays-and-fields/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/525-arrays-and-fields/expected.txt
diff --git a/test/525-arrays-and-fields/info.txt b/test/525-arrays-and-fields/info.txt
new file mode 100644
index 0000000..3e16abf
--- /dev/null
+++ b/test/525-arrays-and-fields/info.txt
@@ -0,0 +1 @@
+Test on (in)variant static and instance field and array references in loops.
diff --git a/test/525-arrays-and-fields/src/Main.java b/test/525-arrays-and-fields/src/Main.java
new file mode 100644
index 0000000..cb1e4af
--- /dev/null
+++ b/test/525-arrays-and-fields/src/Main.java
@@ -0,0 +1,803 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant static and instance field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Static fields.
+  //
+
+  private static boolean sZ;
+  private static byte sB;
+  private static char sC;
+  private static short sS;
+  private static int sI;
+  private static long sJ;
+  private static float sF;
+  private static double sD;
+  private static Object sL;
+
+  //
+  // Static arrays.
+  //
+
+  private static boolean[] sArrZ;
+  private static byte[] sArrB;
+  private static char[] sArrC;
+  private static short[] sArrS;
+  private static int[] sArrI;
+  private static long[] sArrJ;
+  private static float[] sArrF;
+  private static double[] sArrD;
+  private static Object[] sArrL;
+
+  //
+  // Instance fields.
+  //
+
+  private boolean mZ;
+  private byte mB;
+  private char mC;
+  private short mS;
+  private int mI;
+  private long mJ;
+  private float mF;
+  private double mD;
+  private Object mL;
+
+  //
+  // Instance arrays.
+  //
+
+  private boolean[] mArrZ;
+  private byte[] mArrB;
+  private char[] mArrC;
+  private short[] mArrS;
+  private int[] mArrI;
+  private long[] mArrJ;
+  private float[] mArrF;
+  private double[] mArrD;
+  private Object[] mArrL;
+
+  //
+  // Loops on static arrays with invariant static field references.
+  //
+
+  private static void SInvLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+    }
+  }
+
+  private static void SInvLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+    }
+  }
+
+  private static void SInvLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+    }
+  }
+
+  private static void SInvLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+    }
+  }
+
+  private static void SInvLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+    }
+  }
+
+  private static void SInvLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+    }
+  }
+
+  private static void SInvLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+    }
+  }
+
+  private static void SInvLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+    }
+  }
+
+  private static void SInvLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+    }
+  }
+
+  //
+  // Loops on static arrays with variant static field references.
+  //
+
+  private static void SVarLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+      if (i == 10)
+        sZ = !sZ;
+    }
+  }
+
+  private static void SVarLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+      if (i == 10)
+        sB++;
+    }
+  }
+
+  private static void SVarLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+      if (i == 10)
+        sC++;
+    }
+  }
+
+  private static void SVarLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+      if (i == 10)
+        sS++;
+    }
+  }
+
+  private static void SVarLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+      if (i == 10)
+        sI++;
+    }
+  }
+
+  private static void SVarLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+      if (i == 10)
+        sJ++;
+    }
+  }
+
+  private static void SVarLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+      if (i == 10)
+        sF++;
+    }
+  }
+
+  private static void SVarLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+      if (i == 10)
+        sD++;
+    }
+  }
+
+  private static void SVarLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+      if (i == 10)
+        sL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on static arrays with a cross-over reference.
+  //
+
+  private static void SCrossOverLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = !sArrZ[20];
+    }
+  }
+
+  private static void SCrossOverLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = (byte)(sArrB[20] + 2);
+    }
+  }
+
+  private static void SCrossOverLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = (char)(sArrC[20] + 2);
+    }
+  }
+
+  private static void SCrossOverLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = (short)(sArrS[20] + 2);
+    }
+  }
+
+  private static void SCrossOverLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sArrI[20] + 2;
+    }
+  }
+
+  private static void SCrossOverLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sArrJ[20] + 2;
+    }
+  }
+
+  private static void SCrossOverLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrF[20] + 2;
+    }
+  }
+
+  private static void SCrossOverLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrD[20] + 2;
+    }
+  }
+
+  private static void SCrossOverLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // Loops on instance arrays with invariant instance field references.
+  //
+
+  private void InvLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+    }
+  }
+
+  private void InvLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+    }
+  }
+
+  private void InvLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+    }
+  }
+
+  private void InvLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+    }
+  }
+
+  private void InvLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+    }
+  }
+
+  private void InvLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+    }
+  }
+
+  private void InvLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+    }
+  }
+
+  private void InvLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+    }
+  }
+
+  private void InvLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+    }
+  }
+
+  //
+  // Loops on instance arrays with variant instance field references.
+  //
+
+  private void VarLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+      if (i == 10)
+        mZ = !mZ;
+    }
+  }
+
+  private void VarLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+      if (i == 10)
+        mB++;
+    }
+  }
+
+  private void VarLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+      if (i == 10)
+        mC++;
+    }
+  }
+
+  private void VarLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+      if (i == 10)
+        mS++;
+    }
+  }
+
+  private void VarLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+      if (i == 10)
+        mI++;
+    }
+  }
+
+  private void VarLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+      if (i == 10)
+        mJ++;
+    }
+  }
+
+  private void VarLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+      if (i == 10)
+        mF++;
+    }
+  }
+
+  private void VarLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+      if (i == 10)
+        mD++;
+    }
+  }
+
+  private void VarLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+      if (i == 10)
+        mL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on instance arrays with a cross-over reference.
+  //
+
+  private void CrossOverLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = !mArrZ[20];
+    }
+  }
+
+  private void CrossOverLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = (byte)(mArrB[20] + 2);
+    }
+  }
+
+  private void CrossOverLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = (char)(mArrC[20] + 2);
+    }
+  }
+
+  private void CrossOverLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = (short)(mArrS[20] + 2);
+    }
+  }
+
+  private void CrossOverLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mArrI[20] + 2;
+    }
+  }
+
+  private void CrossOverLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mArrJ[20] + 2;
+    }
+  }
+
+  private void CrossOverLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrF[20] + 2;
+    }
+  }
+
+  private void CrossOverLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrD[20] + 2;
+    }
+  }
+
+  private void CrossOverLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // Driver and testers.
+  //
+
+  public static void main(String[] args) {
+    DoStaticTests();
+    new Main().DoInstanceTests();
+  }
+
+  private static void DoStaticTests() {
+    // Type Z.
+    sZ = true;
+    sArrZ = new boolean[100];
+    SInvLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(true, sArrZ[i]);
+    }
+    SVarLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 10, sArrZ[i]);
+    }
+    SCrossOverLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 20, sArrZ[i]);
+    }
+    // Type B.
+    sB = 1;
+    sArrB = new byte[100];
+    SInvLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(1, sArrB[i]);
+    }
+    SVarLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
+    }
+    SCrossOverLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 20 ? 4 : 6, sArrB[i]);
+    }
+    // Type C.
+    sC = 2;
+    sArrC = new char[100];
+    SInvLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(2, sArrC[i]);
+    }
+    SVarLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
+    }
+    SCrossOverLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 20 ? 5 : 7, sArrC[i]);
+    }
+    // Type S.
+    sS = 3;
+    sArrS = new short[100];
+    SInvLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(3, sArrS[i]);
+    }
+    SVarLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
+    }
+    SCrossOverLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 20 ? 6 : 8, sArrS[i]);
+    }
+    // Type I.
+    sI = 4;
+    sArrI = new int[100];
+    SInvLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(4, sArrI[i]);
+    }
+    SVarLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
+    }
+    SCrossOverLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 20 ? 7 : 9, sArrI[i]);
+    }
+    // Type J.
+    sJ = 5;
+    sArrJ = new long[100];
+    SInvLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(5, sArrJ[i]);
+    }
+    SVarLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
+    }
+    SCrossOverLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 20 ? 8 : 10, sArrJ[i]);
+    }
+    // Type F.
+    sF = 6.0f;
+    sArrF = new float[100];
+    SInvLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(6, sArrF[i]);
+    }
+    SVarLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
+    }
+    SCrossOverLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 20 ? 9 : 11, sArrF[i]);
+    }
+    // Type D.
+    sD = 7.0;
+    sArrD = new double[100];
+    SInvLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(7.0, sArrD[i]);
+    }
+    SVarLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
+    }
+    SCrossOverLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 20 ? 10 : 12, sArrD[i]);
+    }
+    // Type L.
+    sL = anObject;
+    sArrL = new Object[100];
+    SInvLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(anObject, sArrL[i]);
+    }
+    SVarLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
+    }
+    SCrossOverLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
+    }
+  }
+
+  private void DoInstanceTests() {
+    // Type Z.
+    mZ = true;
+    mArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(true, mArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 10, mArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 20, mArrZ[i]);
+    }
+    // Type B.
+    mB = 1;
+    mArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(1, mArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 20 ? 4 : 6, mArrB[i]);
+    }
+    // Type C.
+    mC = 2;
+    mArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(2, mArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 20 ? 5 : 7, mArrC[i]);
+    }
+    // Type S.
+    mS = 3;
+    mArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(3, mArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 20 ? 6 : 8, mArrS[i]);
+    }
+    // Type I.
+    mI = 4;
+    mArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(4, mArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 20 ? 7 : 9, mArrI[i]);
+    }
+    // Type J.
+    mJ = 5;
+    mArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(5, mArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 20 ? 8 : 10, mArrJ[i]);
+    }
+    // Type F.
+    mF = 6.0f;
+    mArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(6, mArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 20 ? 9 : 11, mArrF[i]);
+    }
+    // Type D.
+    mD = 7.0;
+    mArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(7.0, mArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 20 ? 10 : 12, mArrD[i]);
+    }
+    // Type L.
+    mL = anObject;
+    mArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(anObject, mArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 4c17240..fd9fcaf 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -31,4 +31,9 @@
 b/22331663
 b/22331663 (pass)
 b/22331663 (fail)
+b/22411633 (1)
+b/22411633 (2)
+b/22411633 (3)
+b/22411633 (4)
+b/22411633 (5)
 Done!
diff --git a/test/800-smali/smali/b_22331663.smali b/test/800-smali/smali/b_22331663.smali
index 057fc7f..bae75c2 100644
--- a/test/800-smali/smali/b_22331663.smali
+++ b/test/800-smali/smali/b_22331663.smali
@@ -4,29 +4,29 @@
 
 .method public static run(Z)V
 .registers 6
-       # Make v4 defined, just use null.
-       const v4, 0
+       if-eqz v5, :if_eqz_target
 
-       if-eqz v5, :Label2
-
-:Label1
        # Construct a java.lang.Object completely, and throw a new exception.
        new-instance v4, Ljava/lang/Object;
        invoke-direct {v4}, Ljava/lang/Object;-><init>()V
 
        new-instance v3, Ljava/lang/RuntimeException;
        invoke-direct {v3}, Ljava/lang/RuntimeException;-><init>()V
+:throw1_begin
        throw v3
+:throw1_end
 
-:Label2
+:if_eqz_target
        # Allocate a java.lang.Object (do not initialize), and throw a new exception.
        new-instance v4, Ljava/lang/Object;
 
        new-instance v3, Ljava/lang/RuntimeException;
        invoke-direct {v3}, Ljava/lang/RuntimeException;-><init>()V
+:throw2_begin
        throw v3
+:throw2_end
 
-:Label3
+:catch_entry
        # Catch handler. Here we had to merge the uninitialized with the initialized reference,
        # which creates a conflict. Copy the conflict, and then return. This should not make the
        # verifier fail the method.
@@ -34,5 +34,6 @@
 
        return-void
 
-.catchall {:Label1 .. :Label3} :Label3
+.catchall {:throw1_begin .. :throw1_end} :catch_entry
+.catchall {:throw2_begin .. :throw2_end} :catch_entry
 .end method
diff --git a/test/800-smali/smali/b_22411633_1.smali b/test/800-smali/smali/b_22411633_1.smali
new file mode 100644
index 0000000..ffc82a8
--- /dev/null
+++ b/test/800-smali/smali/b_22411633_1.smali
@@ -0,0 +1,35 @@
+.class public LB22411633_1;
+.super Ljava/lang/Object;
+
+
+.method public static run(Z)V
+.registers 6
+       # Make v3 & v4 defined, just use null.
+       const v3, 0
+       const v4, 0
+
+       # Allocate a java.lang.Object (do not initialize).
+       new-instance v4, Ljava/lang/Object;
+
+       # Branch forward.
+       if-eqz v5, :LabelMerge
+
+       # Just some random work.
+       add-int/lit16 v3, v3, 1
+
+       # Another branch forward.
+       if-nez v5, :LabelMerge
+
+       # Some more random work, technically dead, but reachable.
+       add-int/lit16 v3, v3, 1
+
+:LabelMerge
+       # v4 is still an uninitialized reference here. Initialize it.
+       invoke-direct {v4}, Ljava/lang/Object;-><init>()V
+
+       # And test whether it's initialized by calling hashCode.
+       invoke-virtual {v4}, Ljava/lang/Object;->hashCode()I
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_22411633_2.smali b/test/800-smali/smali/b_22411633_2.smali
new file mode 100644
index 0000000..9f27c4c
--- /dev/null
+++ b/test/800-smali/smali/b_22411633_2.smali
@@ -0,0 +1,45 @@
+.class public LB22411633_2;
+.super Ljava/lang/Object;
+
+
+.method public static run(Z)V
+.registers 6
+       # Make v3 & v4 defined, just use null.
+       const v3, 0
+       const v4, 0
+
+       # Allocate a java.lang.Object (do not initialize).
+       new-instance v4, Ljava/lang/Object;
+
+       # Branch forward.
+       if-eqz v5, :LabelMerge
+
+       # Create a non-precise object reference. We can do this by merging to objects together
+       # that only have Object as a common ancestor.
+
+       # Allocate a java.lang.Object and initialize it.
+       new-instance v4, Ljava/lang/Object;
+       invoke-direct {v4}, Ljava/lang/Object;-><init>()V
+
+       if-nez v5, :LabelMergeObject
+
+       new-instance v4, Ljava/lang/Integer;
+       invoke-direct {v4}, Ljava/lang/Integer;-><init>()V
+
+:LabelMergeObject
+
+       # Dummy work to separate blocks. At this point, v4 is of type Reference<Object>.
+       add-int/lit16 v3, v3, 1
+
+:LabelMerge
+       # Merge the uninitialized Object from line 12 with the reference to Object from 31. Older
+       # rules set any reference merged with Object to Object. This is wrong in the case of the
+       # other reference being an uninitialized reference, as we'd suddenly allow calling on it.
+
+       # Test whether it's some initialized reference by calling hashCode. This should fail, as we
+       # merged initialized and uninitialized.
+       invoke-virtual {v4}, Ljava/lang/Object;->hashCode()I
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_22411633_3.smali b/test/800-smali/smali/b_22411633_3.smali
new file mode 100644
index 0000000..d1212f1
--- /dev/null
+++ b/test/800-smali/smali/b_22411633_3.smali
@@ -0,0 +1,31 @@
+.class public LB22411633_3;
+.super Ljava/lang/Object;
+
+
+.method public static run(Z)V
+.registers 6
+       # Make v3 & v4 defined, just use null.
+       const v3, 0
+       const v4, 0
+
+       # Allocate a java.lang.Object (do not initialize).
+       new-instance v4, Ljava/lang/Object;
+
+       # Branch forward.
+       if-eqz v5, :LabelMerge
+
+       # Create an initialized Object.
+       new-instance v4, Ljava/lang/Object;
+       invoke-direct {v4}, Ljava/lang/Object;-><init>()V
+
+       # Just some random work.
+       add-int/lit16 v3, v3, 1
+
+:LabelMerge
+       # At this point, an initialized and an uninitialized reference are merged. However, the
+       # merge is only from forward branches. If the conflict isn't used (as here), this should
+       # pass the verifier.
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_22411633_4.smali b/test/800-smali/smali/b_22411633_4.smali
new file mode 100644
index 0000000..503ca99
--- /dev/null
+++ b/test/800-smali/smali/b_22411633_4.smali
@@ -0,0 +1,25 @@
+.class public LB22411633_4;
+.super Ljava/lang/Object;
+
+
+.method public static run(Z)V
+.registers 6
+       # Do not merge into the backward branch target.
+       goto :LabelEntry
+
+:LabelBwd
+       # At this point v4 is an uninitialized reference. This should fail to verify.
+       # Note: we make sure that it is an uninitialized reference and not a conflict in sister
+       #       file b_22411633_bwdok.smali.
+       invoke-virtual {v4}, Ljava/lang/Object;->hashCode()I
+
+:LabelEntry
+       # Allocate a java.lang.Object (do not initialize).
+       new-instance v4, Ljava/lang/Object;
+
+       # Branch backward.
+       if-eqz v5, :LabelBwd
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_22411633_5.smali b/test/800-smali/smali/b_22411633_5.smali
new file mode 100644
index 0000000..b7964f6
--- /dev/null
+++ b/test/800-smali/smali/b_22411633_5.smali
@@ -0,0 +1,28 @@
+.class public LB22411633_5;
+.super Ljava/lang/Object;
+
+
+.method public static run(Z)V
+.registers 6
+       # Do not merge into the backward branch target.
+       goto :LabelEntry
+
+:LabelBwd
+       # At this point v4 is an uninitialized reference. We should be able to initialize here
+       # and call a method afterwards.
+       invoke-direct {v4}, Ljava/lang/Object;-><init>()V
+       invoke-virtual {v4}, Ljava/lang/Object;->hashCode()I
+
+       # Make sure this is not an infinite loop.
+       const v5, 1
+
+:LabelEntry
+       # Allocate a java.lang.Object (do not initialize).
+       new-instance v4, Ljava/lang/Object;
+
+       # Branch backward.
+       if-eqz v5, :LabelBwd
+
+       return-void
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 8be6418..8da2af4 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -109,6 +109,16 @@
                 new Object[] { false }, null, null));
         testCases.add(new TestCase("b/22331663 (fail)", "B22331663Fail", "run",
                 new Object[] { false }, new VerifyError(), null));
+        testCases.add(new TestCase("b/22411633 (1)", "B22411633_1", "run", new Object[] { false },
+                null, null));
+        testCases.add(new TestCase("b/22411633 (2)", "B22411633_2", "run", new Object[] { false },
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/22411633 (3)", "B22411633_3", "run", new Object[] { false },
+                null, null));
+        testCases.add(new TestCase("b/22411633 (4)", "B22411633_4", "run", new Object[] { false },
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/22411633 (5)", "B22411633_5", "run", new Object[] { false },
+                null, null));
     }
 
     public void runTests() {
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
index 0a5b5fd..3637099 100644
--- a/test/955-lambda-smali/expected.txt
+++ b/test/955-lambda-smali/expected.txt
@@ -3,6 +3,16 @@
 ABCD Hello world! (4-args, no closure)
 Caught NPE
 (BoxUnbox) Hello boxing world! (0-args, no closure)
+(BoxUnbox) Boxing repeatedly yields referentially-equal objects
 (BoxUnbox) Caught NPE for unbox-lambda
 (BoxUnbox) Caught NPE for box-lambda
 (BoxUnbox) Caught ClassCastException for unbox-lambda
+(MoveResult) testZ success
+(MoveResult) testB success
+(MoveResult) testS success
+(MoveResult) testI success
+(MoveResult) testC success
+(MoveResult) testJ success
+(MoveResult) testF success
+(MoveResult) testD success
+(MoveResult) testL success
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
index 5e66733..108b5fa 100644
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ b/test/955-lambda-smali/smali/BoxUnbox.smali
@@ -23,15 +23,14 @@
 .end method
 
 .method public static run()V
-.registers 2
-    # Trivial 0-arg hello world
-    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
+    .registers 0
 
+    invoke-static {}, LBoxUnbox;->testBox()V
+    invoke-static {}, LBoxUnbox;->testBoxEquality()V
     invoke-static {}, LBoxUnbox;->testFailures()V
     invoke-static {}, LBoxUnbox;->testFailures2()V
     invoke-static {}, LBoxUnbox;->testFailures3()V
+    invoke-static {}, LBoxUnbox;->forceGC()V
 
     return-void
 .end method
@@ -48,6 +47,47 @@
     return-void
 .end method
 
+# Test boxing and unboxing; the same lambda should be invoked as if there was no box.
+.method private static testBox()V
+    .registers 3
+
+    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    box-lambda v2, v0 # v2 = box(v0)
+    unbox-lambda v0, v2, Ljava/lang/reflect/ArtMethod; # v0 = unbox(v2)
+    invoke-lambda v0, {}
+
+    return-void
+.end method
+
+# Test that boxing the same lambda twice yield the same object.
+.method private static testBoxEquality()V
+   .registers 6 # 0 parameters, 6 locals
+
+    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    box-lambda v2, v0 # v2 = box(v0)
+    box-lambda v3, v0 # v3 = box(v0)
+
+    # The objects should be not-null, and they should have the same reference
+    if-eqz v2, :is_zero
+    if-ne v2, v3, :is_not_equal
+
+    const-string v4, "(BoxUnbox) Boxing repeatedly yields referentially-equal objects"
+    goto :end
+
+:is_zero
+    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: boxing returned null"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: objects were not same reference"
+    goto :end
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+.end method
+
 # Test exceptions are thrown as expected when used opcodes incorrectly
 .method private static testFailures()V
     .registers 4 # 0 parameters, 4 locals
@@ -116,3 +156,14 @@
 
     .catch Ljava/lang/ClassCastException; {:start .. :end} :handler
 .end method
+
+
+# Force a GC. Used to ensure our weak reference table of boxed lambdas is getting swept.
+.method private static forceGC()V
+    .registers 1
+    invoke-static {}, Ljava/lang/Runtime;->getRuntime()Ljava/lang/Runtime;
+    move-result-object v0
+    invoke-virtual {v0}, Ljava/lang/Runtime;->gc()V
+
+    return-void
+.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
index 92afd79..5d2aabb 100644
--- a/test/955-lambda-smali/smali/Main.smali
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -23,6 +23,7 @@
     invoke-static {}, LSanityCheck;->run()I
     invoke-static {}, LTrivialHelloWorld;->run()V
     invoke-static {}, LBoxUnbox;->run()V
+    invoke-static {}, LMoveResult;->run()V
 
 # TODO: add tests when verification fails
 
diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali
new file mode 100644
index 0000000..1725da3
--- /dev/null
+++ b/test/955-lambda-smali/smali/MoveResult.smali
@@ -0,0 +1,330 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LMoveResult;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 8
+    invoke-static {}, LMoveResult;->testZ()V
+    invoke-static {}, LMoveResult;->testB()V
+    invoke-static {}, LMoveResult;->testS()V
+    invoke-static {}, LMoveResult;->testI()V
+    invoke-static {}, LMoveResult;->testC()V
+    invoke-static {}, LMoveResult;->testJ()V
+    invoke-static {}, LMoveResult;->testF()V
+    invoke-static {}, LMoveResult;->testD()V
+    invoke-static {}, LMoveResult;->testL()V
+
+    return-void
+.end method
+
+# Test that booleans are returned correctly via move-result.
+.method public static testZ()V
+    .registers 6
+
+    create-lambda v0, LMoveResult;->lambdaZ(Ljava/lang/reflect/ArtMethod;)Z
+    invoke-lambda v0, {}
+    move-result v2
+    const v3, 1
+
+    if-ne v3, v2, :is_not_equal
+    const-string v4, "(MoveResult) testZ success"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(MoveResult) testZ failed"
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testZ. Always returns "true".
+.method public static lambdaZ(Ljava/lang/reflect/ArtMethod;)Z
+    .registers 3
+
+    const v0, 1
+    return v0
+
+.end method
+
+# Test that bytes are returned correctly via move-result.
+.method public static testB()V
+    .registers 6
+
+    create-lambda v0, LMoveResult;->lambdaB(Ljava/lang/reflect/ArtMethod;)B
+    invoke-lambda v0, {}
+    move-result v2
+    const v3, 15
+
+    if-ne v3, v2, :is_not_equal
+    const-string v4, "(MoveResult) testB success"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(MoveResult) testB failed"
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testB. Always returns "15".
+.method public static lambdaB(Ljava/lang/reflect/ArtMethod;)B
+    .registers 3 # 1 parameters, 2 locals
+
+    const v0, 15
+    return v0
+
+.end method
+
+# Test that shorts are returned correctly via move-result.
+.method public static testS()V
+    .registers 6
+
+    create-lambda v0, LMoveResult;->lambdaS(Ljava/lang/reflect/ArtMethod;)S
+    invoke-lambda v0, {}
+    move-result v2
+    const/16 v3, 31000
+
+    if-ne v3, v2, :is_not_equal
+    const-string v4, "(MoveResult) testS success"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(MoveResult) testS failed"
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testS. Always returns "31000".
+.method public static lambdaS(Ljava/lang/reflect/ArtMethod;)S
+    .registers 3
+
+    const/16 v0, 31000
+    return v0
+
+.end method
+
+# Test that ints are returned correctly via move-result.
+.method public static testI()V
+    .registers 6
+
+    create-lambda v0, LMoveResult;->lambdaI(Ljava/lang/reflect/ArtMethod;)I
+    invoke-lambda v0, {}
+    move-result v2
+    const v3, 128000
+
+    if-ne v3, v2, :is_not_equal
+    const-string v4, "(MoveResult) testI success"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(MoveResult) testI failed"
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testI. Always returns "128000".
+.method public static lambdaI(Ljava/lang/reflect/ArtMethod;)I
+    .registers 3
+
+    const v0, 128000
+    return v0
+
+.end method
+
+# Test that chars are returned correctly via move-result.
+.method public static testC()V
+    .registers 6
+
+    create-lambda v0, LMoveResult;->lambdaC(Ljava/lang/reflect/ArtMethod;)C
+    invoke-lambda v0, {}
+    move-result v2
+    const v3, 65535
+
+    if-ne v3, v2, :is_not_equal
+    const-string v4, "(MoveResult) testC success"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(MoveResult) testC failed"
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testC. Always returns "65535".
+.method public static lambdaC(Ljava/lang/reflect/ArtMethod;)C
+    .registers 3
+
+    const v0, 65535
+    return v0
+
+.end method
+
+# Test that longs are returned correctly via move-result.
+.method public static testJ()V
+    .registers 8
+
+    create-lambda v0, LMoveResult;->lambdaJ(Ljava/lang/reflect/ArtMethod;)J
+    invoke-lambda v0, {}
+    move-result v2
+    const-wide v4, 0xdeadf00dc0ffee
+
+    if-ne v4, v2, :is_not_equal
+    const-string v6, "(MoveResult) testJ success"
+    goto :end
+
+:is_not_equal
+    const-string v6, "(MoveResult) testJ failed"
+
+:end
+    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testC. Always returns "0xdeadf00dc0ffee".
+.method public static lambdaJ(Ljava/lang/reflect/ArtMethod;)J
+    .registers 4
+
+    const-wide v0, 0xdeadf00dc0ffee
+    return-wide v0
+
+.end method
+
+# Test that floats are returned correctly via move-result.
+.method public static testF()V
+    .registers 6
+
+    create-lambda v0, LMoveResult;->lambdaF(Ljava/lang/reflect/ArtMethod;)F
+    invoke-lambda v0, {}
+    move-result v2
+    const v3, infinityf
+
+    if-ne v3, v2, :is_not_equal
+    const-string v4, "(MoveResult) testF success"
+    goto :end
+
+:is_not_equal
+    const-string v4, "(MoveResult) testF failed"
+
+:end
+    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testF. Always returns "infinityf".
+.method public static lambdaF(Ljava/lang/reflect/ArtMethod;)F
+    .registers 3
+
+    const v0, infinityf
+    return v0
+
+.end method
+
+# Test that doubles are returned correctly via move-result.
+.method public static testD()V
+    .registers 8
+
+    create-lambda v0, LMoveResult;->lambdaD(Ljava/lang/reflect/ArtMethod;)D
+    invoke-lambda v0, {}
+    move-result-wide v2
+    const-wide v4, infinity
+
+    if-ne v4, v2, :is_not_equal
+    const-string v6, "(MoveResult) testD success"
+    goto :end
+
+:is_not_equal
+    const-string v6, "(MoveResult) testD failed"
+
+:end
+    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testD. Always returns "infinity".
+.method public static lambdaD(Ljava/lang/reflect/ArtMethod;)D
+    .registers 4
+
+    const-wide v0, infinity # 123.456789
+    return-wide v0
+
+.end method
+
+
+# Test that objects are returned correctly via move-result.
+.method public static testL()V
+    .registers 8
+
+    create-lambda v0, LMoveResult;->lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String;
+    invoke-lambda v0, {}
+    move-result-object v2
+    const-string v4, "Interned string"
+
+    # relies on string interning returning identical object references
+    if-ne v4, v2, :is_not_equal
+    const-string v6, "(MoveResult) testL success"
+    goto :end
+
+:is_not_equal
+    const-string v6, "(MoveResult) testL failed"
+
+:end
+    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+
+.end method
+
+# Lambda target for testL. Always returns "Interned string" (string).
+.method public static lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String;
+    .registers 4
+
+    const-string v0, "Interned string"
+    return-object v0
+
+.end method
+
+
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 38973f7..3d5c483 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -46,6 +46,10 @@
     $(JILL_JAR)
 endif
 
+ifeq ($(ART_TEST_DEBUG_GC),true)
+  ART_TEST_WITH_STRACE := true
+endif
+
 # Helper to create individual build targets for tests. Must be called with $(eval).
 # $(1): the test number
 define define-build-art-run-test
@@ -108,6 +112,9 @@
 ifeq ($(ART_TEST_DEFAULT_COMPILER),true)
   COMPILER_TYPES += default
 endif
+ifeq ($(ART_TEST_INTERPRETER_ACCESS_CHECKS),true)
+  COMPILER_TYPES += interpreter-access-checks
+endif
 ifeq ($(ART_TEST_INTERPRETER),true)
   COMPILER_TYPES += interpreter
 endif
@@ -194,7 +201,7 @@
 # To generate a full list or tests:
 # $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
 #        $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-#        $(DEBUGGABLE_TYPES) $(TEST_ART_RUN_TESTS), $(ALL_ADDRESS_SIZES)
+#        $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 
 # Convert's a rule name to the form used in variables, e.g. no-relocate to NO_RELOCATE
 define name-to-var
@@ -256,6 +263,28 @@
 
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
+# Temporarily disable some broken tests when forcing access checks in interpreter b/22414682
+TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \
+  004-JniTest \
+  005-annotations \
+  044-proxy \
+  073-mismatched-field \
+  088-monitor-verification \
+  135-MirandaDispatch \
+  137-cfi \
+  412-new-array \
+  471-uninitialized-locals \
+  506-verify-aput \
+  800-smali
+
+ifneq (,$(filter interpreter-access-checks,$(COMPILER_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      interpreter-access-checks,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS :=
+
 # Tests that are broken with GC stress.
 # 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
 # hope the second process got into the expected state. The slowness of gcstress makes this bad.
@@ -600,7 +629,8 @@
 
 # Create a rule to build and run a tests following the form:
 # test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
-#    {4: interpreter default optimizing jit}-{5: relocate nrelocate relocate-npatchoat}-
+#    {4: interpreter default optimizing jit interpreter-access-checks}-
+#    {5: relocate nrelocate relocate-npatchoat}-
 #    {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
 #    {9: no-image image picimage}-{10: pictest npictest}-
 #    {11: ndebuggable debuggable}-{12: test name}{13: 32 or 64}
@@ -615,6 +645,9 @@
   else
     run_test_options += --build-with-javac-dx
   endif
+  ifeq ($(ART_TEST_WITH_STRACE),true)
+    run_test_options += --strace
+  endif
   ifeq ($(ART_TEST_RUN_TEST_ALWAYS_CLEAN),true)
     run_test_options += --always-clean
   endif
@@ -667,6 +700,9 @@
     ifeq ($(4),interpreter)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES
       run_test_options += --interpreter
+    else ifeq ($(4),interpreter-access-checks)
+      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_ACCESS_CHECKS_RULES
+      run_test_options += --interpreter --verify-soft-fail
     else
       ifeq ($(4),default)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEFAULT_RULES
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 842d87e..db64b77 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -42,7 +42,7 @@
 TIME_OUT_VALUE=10
 USE_GDB="n"
 USE_JVM="n"
-VERIFY="y"
+VERIFY="y" # y=yes,n=no,s=softfail
 ZYGOTE=""
 DEX_VERIFY=""
 USE_DEX2OAT_AND_PATCHOAT="y"
@@ -149,6 +149,9 @@
     elif [ "x$1" = "x--no-verify" ]; then
         VERIFY="n"
         shift
+    elif [ "x$1" = "x--verify-soft-fail" ]; then
+        VERIFY="s"
+        shift
     elif [ "x$1" = "x--no-optimize" ]; then
         OPTIMIZE="n"
         shift
@@ -201,7 +204,11 @@
     if [ "$VERIFY" = "y" ]; then
         JVM_VERIFY_ARG="-Xverify:all"
         msg "Performing verification"
-    else
+    elif [ "$VERIFY" = "s" ]; then
+        JVM_VERIFY_ARG="Xverify:all"
+        DEX_VERIFY="-Xverify:softfail"
+        msg "Forcing verification to be soft fail"
+    else # VERIFY = "n"
         DEX_VERIFY="-Xverify:none"
         JVM_VERIFY_ARG="-Xverify:none"
         msg "Skipping verification"
@@ -263,7 +270,10 @@
     INT_OPTS="-Xint"
     if [ "$VERIFY" = "y" ] ; then
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
-    else
+    elif [ "$VERIFY" = "s" ]; then
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
+      DEX_VERIFY="${DEX_VERIFY} -Xverify:softfail"
+    else # VERIFY = "n"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
diff --git a/test/run-test b/test/run-test
index f5fff09a..eabbab3 100755
--- a/test/run-test
+++ b/test/run-test
@@ -117,6 +117,7 @@
 output="output.txt"
 build_output="build-output.txt"
 cfg_output="graph.cfg"
+strace_output="strace-output.txt"
 lib="libartd.so"
 run_args="--quiet"
 build_args=""
@@ -137,6 +138,7 @@
 basic_verify="false"
 gc_verify="false"
 gc_stress="false"
+strace="false"
 always_clean="no"
 never_clean="no"
 have_dex2oat="yes"
@@ -235,6 +237,10 @@
         run_args="${run_args} --gdb"
         dev_mode="yes"
         shift
+    elif [ "x$1" = "x--strace" ]; then
+        strace="yes"
+        run_args="${run_args} --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
+        shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
         shift
@@ -256,6 +262,10 @@
     elif [ "x$1" = "x--no-verify" ]; then
         run_args="${run_args} --no-verify"
         shift
+    elif [ "x$1" = "x--verify-soft-fail" ]; then
+        run_args="${run_args} --verify-soft-fail"
+        image_suffix="-interpreter-access-checks"
+        shift
     elif [ "x$1" = "x--no-optimize" ]; then
         run_args="${run_args} --no-optimize"
         shift
@@ -514,6 +524,9 @@
         echo "    --optimizing          Enable optimizing compiler (default)."
         echo "    --quick               Use Quick compiler (off by default)."
         echo "    --no-verify           Turn off verification (on by default)."
+        echo "    --verify-soft-fail    Force soft fail verification (off by default)."
+        echo "                          Verification is enabled if neither --no-verify"
+        echo "                          nor --verify-soft-fail is specified."
         echo "    --no-optimize         Turn off optimization (on by default)."
         echo "    --no-precise          Turn off precise GC (on by default)."
         echo "    --zygote              Spawn the process from the Zygote." \
@@ -750,6 +763,11 @@
         echo '#################### diffs'
         diff --strip-trailing-cr -u "$expected" "$output" | tail -n 2000
         echo '####################'
+        if [ "$strace" = "yes" ]; then
+            echo '#################### strace output'
+            tail -n 2000 "$tmp_dir/$strace_output"
+            echo '####################'
+        fi
         echo ' '
     fi
 
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 65c3fed..992a8a6 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -144,5 +144,11 @@
   modes: [device],
   names: ["libcore.io.OsTest#test_xattr"],
   bug: 22258911
+},
+{
+  description: "fails on L builds: needs investigation",
+  result: EXEC_FAILED,
+  modes: [device],
+  names: ["org.apache.harmony.tests.java.lang.ClassTest#test_forNameLjava_lang_String"]
 }
 ]