am 921270a3: am 8ab30e0c: am 31b9d664: Hold proxy classes live in class linker.

* commit '921270a3c925a88e813f65edebe37076609a3443':
  Hold proxy classes live in class linker.
diff --git a/Android.mk b/Android.mk
index 46a7c1e..0b4b231 100644
--- a/Android.mk
+++ b/Android.mk
@@ -85,6 +85,7 @@
 include $(art_path)/runtime/Android.mk
 include $(art_path)/compiler/Android.mk
 include $(art_path)/dex2oat/Android.mk
+include $(art_path)/disassembler/Android.mk
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/dalvikvm/Android.mk
 include $(art_path)/jdwpspy/Android.mk
diff --git a/build/Android.common.mk b/build/Android.common.mk
index dd0ba4d..0871884 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -105,6 +105,7 @@
 ART_C_INCLUDES := \
 	external/gtest/include \
 	external/valgrind/main/include \
+	external/valgrind/main \
 	external/zlib \
 	frameworks/compile/mclinker/include
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 66ff461..fc2f02b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -74,7 +74,6 @@
 	llvm/md_builder.cc \
 	llvm/runtime_support_builder.cc \
 	llvm/runtime_support_builder_arm.cc \
-	llvm/runtime_support_builder_thumb2.cc \
 	llvm/runtime_support_builder_x86.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arm/assembler_arm.cc \
diff --git a/compiler/dex/arena_allocator.cc b/compiler/dex/arena_allocator.cc
index 36393e7..2da8064 100644
--- a/compiler/dex/arena_allocator.cc
+++ b/compiler/dex/arena_allocator.cc
@@ -19,12 +19,15 @@
 #include "arena_allocator.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "thread-inl.h"
+#include <memcheck/memcheck.h>
 
 namespace art {
 
 // Memmap is a bit slower than malloc according to my measurements.
 static constexpr bool kUseMemMap = false;
 static constexpr bool kUseMemSet = true && kUseMemMap;
+static constexpr size_t kValgrindRedZoneBytes = 8;
 
 static const char* alloc_names[ArenaAllocator::kNumAllocKinds] = {
   "Misc       ",
@@ -107,6 +110,9 @@
 
 void ArenaPool::FreeArena(Arena* arena) {
   Thread* self = Thread::Current();
+  if (UNLIKELY(RUNNING_ON_VALGRIND)) {
+    VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_);
+  }
   {
     MutexLock lock(self, lock_);
     arena->next_ = free_arenas_;
@@ -128,7 +134,8 @@
     end_(nullptr),
     ptr_(nullptr),
     arena_head_(nullptr),
-    num_allocations_(0) {
+    num_allocations_(0),
+    running_on_valgrind_(RUNNING_ON_VALGRIND) {
   memset(&alloc_stats_[0], 0, sizeof(alloc_stats_));
 }
 
@@ -140,6 +147,29 @@
   }
 }
 
+void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
+  size_t rounded_bytes = (bytes + 3 + kValgrindRedZoneBytes) & ~3;
+  if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
+    // Obtain a new block.
+    ObtainNewArenaForAllocation(rounded_bytes);
+    if (UNLIKELY(ptr_ == nullptr)) {
+      return nullptr;
+    }
+  }
+  if (kCountAllocations) {
+    alloc_stats_[kind] += rounded_bytes;
+    ++num_allocations_;
+  }
+  uint8_t* ret = ptr_;
+  ptr_ += rounded_bytes;
+  // Check that the memory is already zeroed out.
+  for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) {
+    CHECK_EQ(*ptr, 0U);
+  }
+  VALGRIND_MAKE_MEM_NOACCESS(ret + bytes, rounded_bytes - bytes);
+  return ret;
+}
+
 ArenaAllocator::~ArenaAllocator() {
   // Reclaim all the arenas by giving them back to the thread pool.
   UpdateBytesAllocated();
diff --git a/compiler/dex/arena_allocator.h b/compiler/dex/arena_allocator.h
index dda52a2..d11d67c 100644
--- a/compiler/dex/arena_allocator.h
+++ b/compiler/dex/arena_allocator.h
@@ -103,6 +103,9 @@
 
   // Returns zeroed memory.
   void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
+    if (UNLIKELY(running_on_valgrind_)) {
+      return AllocValgrind(bytes, kind);
+    }
     bytes = (bytes + 3) & ~3;
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
@@ -120,6 +123,7 @@
     return ret;
   }
 
+  void* AllocValgrind(size_t bytes, ArenaAllocKind kind);
   void ObtainNewArenaForAllocation(size_t allocation_size);
   size_t BytesAllocated() const;
   void DumpMemStats(std::ostream& os) const;
@@ -132,10 +136,9 @@
   uint8_t* end_;
   uint8_t* ptr_;
   Arena* arena_head_;
-
-  // Statistics.
   size_t num_allocations_;
-  size_t alloc_stats_[kNumAllocKinds];   // Bytes used by various allocation kinds.
+  size_t alloc_stats_[kNumAllocKinds];  // Bytes used by various allocation kinds.
+  bool running_on_valgrind_;
 
   DISALLOW_COPY_AND_ASSIGN(ArenaAllocator);
 };  // ArenaAllocator
diff --git a/compiler/dex/arena_bit_vector.cc b/compiler/dex/arena_bit_vector.cc
index 3fa9295..b921f61 100644
--- a/compiler/dex/arena_bit_vector.cc
+++ b/compiler/dex/arena_bit_vector.cc
@@ -87,12 +87,6 @@
   storage_[num >> 5] &= ~check_masks[num & 0x1f];
 }
 
-// Copy a whole vector to the other. Sizes must match.
-void ArenaBitVector::Copy(ArenaBitVector* src) {
-  DCHECK_EQ(storage_size_, src->GetStorageSize());
-  memcpy(storage_, src->GetRawStorage(), sizeof(uint32_t) * storage_size_);
-}
-
 // Intersect with another bit vector.  Sizes and expandability must be the same.
 void ArenaBitVector::Intersect(const ArenaBitVector* src) {
   DCHECK_EQ(storage_size_, src->GetStorageSize());
diff --git a/compiler/dex/arena_bit_vector.h b/compiler/dex/arena_bit_vector.h
index 8bcd628..24a7ce9 100644
--- a/compiler/dex/arena_bit_vector.h
+++ b/compiler/dex/arena_bit_vector.h
@@ -44,7 +44,7 @@
           DCHECK_EQ(bit_size_, p_bits_->GetStorageSize() * sizeof(uint32_t) * 8);
           DCHECK_EQ(bit_storage_, p_bits_->GetRawStorage());
 
-          if (bit_index_ >= bit_size_) return -1;
+          if (UNLIKELY(bit_index_ >= bit_size_)) return -1;
 
           uint32_t word_index = bit_index_ / 32;
           uint32_t word = bit_storage_[word_index];
@@ -54,7 +54,7 @@
             bit_index_ &= ~0x1f;
             do {
               word_index++;
-              if ((word_index * 32) >= bit_size_) {
+              if (UNLIKELY((word_index * 32) >= bit_size_)) {
                 bit_index_ = bit_size_;
                 return -1;
               }
@@ -95,7 +95,9 @@
     bool IsBitSet(unsigned int num);
     void ClearAllBits();
     void SetInitialBits(unsigned int num_bits);
-    void Copy(ArenaBitVector* src);
+    void Copy(ArenaBitVector* src) {
+      memcpy(storage_, src->GetRawStorage(), sizeof(uint32_t) * storage_size_);
+    }
     void Intersect(const ArenaBitVector* src2);
     void Union(const ArenaBitVector* src);
     // Are we equal to another bit vector?  Note: expandability attributes must also match.
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 97a682f..17b5bb5 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -412,6 +412,27 @@
 
 std::ostream& operator<<(std::ostream& os, const OatBitMapKind& kind);
 
+// LIR fixup kinds for Arm
+enum FixupKind {
+  kFixupNone,
+  kFixupLabel,       // For labels we just adjust the offset.
+  kFixupLoad,        // Mostly for imediates.
+  kFixupVLoad,       // FP load which *may* be pc-relative.
+  kFixupCBxZ,        // Cbz, Cbnz.
+  kFixupPushPop,     // Not really pc relative, but changes size based on args.
+  kFixupCondBranch,  // Conditional branch
+  kFixupT1Branch,    // Thumb1 Unconditional branch
+  kFixupT2Branch,    // Thumb2 Unconditional branch
+  kFixupBlx1,        // Blx1 (start of Blx1/Blx2 pair).
+  kFixupBl1,         // Bl1 (start of Bl1/Bl2 pair).
+  kFixupAdr,         // Adr.
+  kFixupMovImmLST,   // kThumb2MovImm16LST.
+  kFixupMovImmHST,   // kThumb2MovImm16HST.
+  kFixupAlign4,      // Align to 4-byte boundary.
+};
+
+std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index 06cc505..236c6f4 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -21,42 +21,63 @@
 
 namespace art {
 
-inline BasicBlock* DataflowIterator::NextBody(bool had_change) {
-  changed_ |= had_change;
+// Single forward pass over the nodes.
+inline BasicBlock* DataflowIterator::ForwardSingleNext() {
   BasicBlock* res = NULL;
-  if (reverse_) {
-    if (is_iterative_ && changed_ && (idx_ < 0)) {
-      idx_ = start_idx_;
-      changed_ = false;
-    }
-    if (idx_ >= 0) {
-      int bb_id = block_id_list_->Get(idx_--);
-      res = mir_graph_->GetBasicBlock(bb_id);
-    }
-  } else {
-    if (is_iterative_ && changed_ && (idx_ >= end_idx_)) {
-      idx_ = start_idx_;
-      changed_ = false;
-    }
-    if (idx_ < end_idx_) {
-      int bb_id = block_id_list_->Get(idx_++);
-      res = mir_graph_->GetBasicBlock(bb_id);
-    }
+  if (idx_ < end_idx_) {
+    int bb_id = block_id_list_->Get(idx_++);
+    res = mir_graph_->GetBasicBlock(bb_id);
   }
   return res;
 }
 
-// AllNodes uses the existing GrowableArray iterator, so use different NextBody().
-inline BasicBlock* AllNodesIterator::NextBody(bool had_change) {
+// Repeat full forward passes over all nodes until no change occurs during a complete pass.
+inline BasicBlock* DataflowIterator::ForwardRepeatNext(bool had_change) {
   changed_ |= had_change;
   BasicBlock* res = NULL;
+  if ((idx_ >= end_idx_) && changed_) {
+    idx_ = start_idx_;
+    changed_ = false;
+  }
+  if (idx_ < end_idx_) {
+    int bb_id = block_id_list_->Get(idx_++);
+    res = mir_graph_->GetBasicBlock(bb_id);
+  }
+  return res;
+}
+
+// Single reverse pass over the nodes.
+inline BasicBlock* DataflowIterator::ReverseSingleNext() {
+  BasicBlock* res = NULL;
+  if (idx_ >= 0) {
+    int bb_id = block_id_list_->Get(idx_--);
+    res = mir_graph_->GetBasicBlock(bb_id);
+  }
+  return res;
+}
+
+// Repeat full backwards passes over all nodes until no change occurs during a complete pass.
+inline BasicBlock* DataflowIterator::ReverseRepeatNext(bool had_change) {
+  changed_ |= had_change;
+  BasicBlock* res = NULL;
+  if ((idx_ < 0) && changed_) {
+    idx_ = start_idx_;
+    changed_ = false;
+  }
+  if (idx_ >= 0) {
+    int bb_id = block_id_list_->Get(idx_--);
+    res = mir_graph_->GetBasicBlock(bb_id);
+  }
+  return res;
+}
+
+// AllNodes uses the existing GrowableArray iterator, and should be considered unordered.
+inline BasicBlock* AllNodesIterator::Next() {
+  BasicBlock* res = NULL;
   bool keep_looking = true;
   while (keep_looking) {
     res = all_nodes_iterator_->Next();
-    if (is_iterative_ && changed_ && (res == NULL)) {
-      all_nodes_iterator_->Reset();
-      changed_ = false;
-    } else if ((res == NULL) || (!res->hidden)) {
+    if ((res == NULL) || (!res->hidden)) {
       keep_looking = false;
     }
   }
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index da44ffd..1dab54e 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -27,124 +27,130 @@
    * interesting orders.  Note that for efficiency, the visit orders have been pre-computed.
    * The order itself will not change during the iteration.  However, for some uses,
    * auxiliary data associated with the basic blocks may be changed during the iteration,
-   * necessitating another pass over the list.
-   *
-   * To support this usage, we have is_iterative_.  If false, the iteration is a one-shot
-   * pass through the pre-computed list using Next().  If true, the caller must tell the
-   * iterator whether a change has been made that necessitates another pass.  Use
-   * Next(had_change) for this.  The general idea is that the iterative_ use case means
-   * that the iterator will keep repeating the full basic block list until a complete pass
-   * is made through it with no changes.  Note that calling Next(true) does not affect
-   * the iteration order or short-curcuit the current pass - it simply tells the iterator
-   * that once it has finished walking through the block list it should reset and do another
-   * full pass through the list.
+   * necessitating another pass over the list.  If this behavior is required, use the
+   * "Repeating" variant.  For the repeating variant, the caller must tell the iterator
+   * whether a change has been made that necessitates another pass.  Note that calling Next(true)
+   * does not affect the iteration order or short-circuit the current pass - it simply tells
+   * the iterator that once it has finished walking through the block list it should reset and
+   * do another full pass through the list.
    */
   class DataflowIterator {
     public:
       virtual ~DataflowIterator() {}
 
-      // Return the next BasicBlock* to visit.
-      BasicBlock* Next() {
-        DCHECK(!is_iterative_);
-        return NextBody(false);
-      }
-
-      /*
-       * Return the next BasicBlock* to visit, and tell the iterator whether any change
-       * has occurred that requires another full pass over the block list.
-       */
-      BasicBlock* Next(bool had_change) {
-        DCHECK(is_iterative_);
-        return NextBody(had_change);
-      }
-
     protected:
-      DataflowIterator(MIRGraph* mir_graph, bool is_iterative, int start_idx, int end_idx,
-                       bool reverse)
+      DataflowIterator(MIRGraph* mir_graph, int start_idx, int end_idx)
           : mir_graph_(mir_graph),
-            is_iterative_(is_iterative),
             start_idx_(start_idx),
             end_idx_(end_idx),
-            reverse_(reverse),
             block_id_list_(NULL),
             idx_(0),
             changed_(false) {}
 
-      virtual BasicBlock* NextBody(bool had_change) ALWAYS_INLINE;
+      virtual BasicBlock* ForwardSingleNext() ALWAYS_INLINE;
+      virtual BasicBlock* ReverseSingleNext() ALWAYS_INLINE;
+      virtual BasicBlock* ForwardRepeatNext(bool had_change) ALWAYS_INLINE;
+      virtual BasicBlock* ReverseRepeatNext(bool had_change) ALWAYS_INLINE;
 
       MIRGraph* const mir_graph_;
-      const bool is_iterative_;
       const int start_idx_;
       const int end_idx_;
-      const bool reverse_;
       GrowableArray<int>* block_id_list_;
       int idx_;
       bool changed_;
   };  // DataflowIterator
 
-  class ReachableNodesIterator : public DataflowIterator {
-    public:
-      ReachableNodesIterator(MIRGraph* mir_graph, bool is_iterative)
-          : DataflowIterator(mir_graph, is_iterative, 0,
-                             mir_graph->GetNumReachableBlocks(), false) {
-        idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDfsOrder();
-      }
-  };
-
   class PreOrderDfsIterator : public DataflowIterator {
     public:
-      PreOrderDfsIterator(MIRGraph* mir_graph, bool is_iterative)
-          : DataflowIterator(mir_graph, is_iterative, 0,
-                             mir_graph->GetNumReachableBlocks(), false) {
+      explicit PreOrderDfsIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         idx_ = start_idx_;
         block_id_list_ = mir_graph->GetDfsOrder();
       }
+
+      BasicBlock* Next() {
+        return ForwardSingleNext();
+      }
   };
 
-  class PostOrderDfsIterator : public DataflowIterator {
+  class RepeatingPreOrderDfsIterator : public DataflowIterator {
     public:
-      PostOrderDfsIterator(MIRGraph* mir_graph, bool is_iterative)
-          : DataflowIterator(mir_graph, is_iterative, 0,
-                             mir_graph->GetNumReachableBlocks(), false) {
+      explicit RepeatingPreOrderDfsIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
+        idx_ = start_idx_;
+        block_id_list_ = mir_graph->GetDfsOrder();
+      }
+
+      BasicBlock* Next(bool had_change) {
+        return ForwardRepeatNext(had_change);
+      }
+  };
+
+  class RepeatingPostOrderDfsIterator : public DataflowIterator {
+    public:
+      explicit RepeatingPostOrderDfsIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         idx_ = start_idx_;
         block_id_list_ = mir_graph->GetDfsPostOrder();
       }
+
+      BasicBlock* Next(bool had_change) {
+        return ForwardRepeatNext(had_change);
+      }
   };
 
   class ReversePostOrderDfsIterator : public DataflowIterator {
     public:
-      ReversePostOrderDfsIterator(MIRGraph* mir_graph, bool is_iterative)
-          : DataflowIterator(mir_graph, is_iterative,
-                             mir_graph->GetNumReachableBlocks() -1, 0, true) {
+      explicit ReversePostOrderDfsIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) {
         idx_ = start_idx_;
         block_id_list_ = mir_graph->GetDfsPostOrder();
       }
+
+      BasicBlock* Next() {
+        return ReverseSingleNext();
+      }
+  };
+
+  class RepeatingReversePostOrderDfsIterator : public DataflowIterator {
+    public:
+      explicit RepeatingReversePostOrderDfsIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) {
+        idx_ = start_idx_;
+        block_id_list_ = mir_graph->GetDfsPostOrder();
+      }
+
+      BasicBlock* Next(bool had_change) {
+        return ReverseRepeatNext(had_change);
+      }
   };
 
   class PostOrderDOMIterator : public DataflowIterator {
     public:
-      PostOrderDOMIterator(MIRGraph* mir_graph, bool is_iterative)
-          : DataflowIterator(mir_graph, is_iterative, 0,
-                             mir_graph->GetNumReachableBlocks(), false) {
+      explicit PostOrderDOMIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         idx_ = start_idx_;
         block_id_list_ = mir_graph->GetDomPostOrder();
       }
+
+      BasicBlock* Next() {
+        return ForwardSingleNext();
+      }
   };
 
   class AllNodesIterator : public DataflowIterator {
     public:
-      AllNodesIterator(MIRGraph* mir_graph, bool is_iterative)
-          : DataflowIterator(mir_graph, is_iterative, 0, 0, false) {
-        all_nodes_iterator_ =
-            new (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList());
+      explicit AllNodesIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, 0) {
+        all_nodes_iterator_ = new
+            (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList());
       }
 
       void Reset() {
         all_nodes_iterator_->Reset();
       }
 
-      BasicBlock* NextBody(bool had_change) ALWAYS_INLINE;
+      BasicBlock* Next() ALWAYS_INLINE;
 
     private:
       GrowableArray<BasicBlock*>::Iterator* all_nodes_iterator_;
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 63d8aa0..abafbc5 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -24,6 +24,7 @@
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace optimizer {
@@ -216,8 +217,8 @@
   uint32_t field_idx = inst->VRegC_22c();
   int field_offset;
   bool is_volatile;
-  bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, field_offset,
-                                                    is_volatile, is_put);
+  bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put,
+                                                    &field_offset, &is_volatile);
   if (fast_path && !is_volatile && IsUint(16, field_offset)) {
     VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
                    << " to " << Instruction::Name(new_opcode)
@@ -246,11 +247,13 @@
   int vtable_idx;
   uintptr_t direct_code;
   uintptr_t direct_method;
-  bool fast_path = driver_.ComputeInvokeInfo(&unit_, dex_pc, invoke_type,
-                                             target_method, vtable_idx,
-                                             direct_code, direct_method,
-                                             false);
   // TODO: support devirtualization.
+  const bool kEnableDevirtualization = false;
+  bool fast_path = driver_.ComputeInvokeInfo(&unit_, dex_pc,
+                                             false, kEnableDevirtualization,
+                                             &invoke_type,
+                                             &target_method, &vtable_idx,
+                                             &direct_code, &direct_method);
   if (fast_path && original_invoke_type == invoke_type) {
     if (vtable_idx >= 0 && IsUint(16, vtable_idx)) {
       VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index fefcab9..2952570 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -117,6 +117,11 @@
 #endif
 ) {
   VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
+  if (code_item->insns_size_in_code_units_ >= 0x10000) {
+    LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_
+              << " in " << PrettyMethod(method_idx, dex_file);
+    return NULL;
+  }
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   CompilationUnit cu(&compiler.GetArenaPool());
@@ -151,7 +156,7 @@
    */
 
   if (compiler_backend == kPortable) {
-    // Fused long branches not currently usseful in bitcode.
+    // Fused long branches not currently useful in bitcode.
     cu.disable_opt |= (1 << kBranchFusing);
   }
 
diff --git a/compiler/dex/growable_array.h b/compiler/dex/growable_array.h
index 8e2abfb..639120a 100644
--- a/compiler/dex/growable_array.h
+++ b/compiler/dex/growable_array.h
@@ -131,6 +131,11 @@
       elem_list_[index]++;
     }
 
+    /*
+     * Remove an existing element from list.  If there are more than one copy
+     * of the element, only the first one encountered will be deleted.
+     */
+    // TODO: consider renaming this.
     void Delete(T element) {
       bool found = false;
       for (size_t i = 0; i < num_used_ - 1; i++) {
@@ -150,6 +155,11 @@
 
     size_t Size() const { return num_used_; }
 
+    void SetSize(size_t new_size) {
+      Resize(new_size);
+      num_used_ = new_size;
+    }
+
     T* GetRawStorage() const { return elem_list_; }
 
     static void* operator new(size_t size, ArenaAllocator* arena) {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index d7a4136..8597172 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1032,6 +1032,14 @@
    */
   if (GetNumDalvikInsns() > Runtime::Current()->GetHugeMethodThreshold()) {
     skip_compilation = true;
+    // If we're got a huge number of basic blocks, don't bother with further analysis.
+    if (static_cast<size_t>(num_blocks_) > (Runtime::Current()->GetHugeMethodThreshold() / 2)) {
+      return true;
+    }
+  } else if (GetNumDalvikInsns() > Runtime::Current()->GetLargeMethodThreshold() &&
+    /* If it's large and contains no branches, it's likely to be machine generated initialization */
+      (GetBranchCount() == 0)) {
+    return true;
   } else if (compiler_filter == Runtime::kSpeed) {
     // If not huge, compile.
     return false;
@@ -1061,7 +1069,7 @@
   memset(&stats, 0, sizeof(stats));
 
   ClearAllVisitedFlags();
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     AnalyzeBlock(bb, &stats);
   }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 3a73717..3d29908 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1221,10 +1221,10 @@
   uint32_t current_offset = static_cast<uint32_t>(current_offset_);
   bool fast_path =
       cu_->compiler_driver->ComputeInvokeInfo(&m_unit, current_offset,
-                                              type, target_method,
-                                              vtable_idx,
-                                              direct_code, direct_method,
-                                              false) &&
+                                              false, true,
+                                              &type, &target_method,
+                                              &vtable_idx,
+                                              &direct_code, &direct_method) &&
                                               !(cu_->enable_debug & (1 << kDebugSlowInvokePath));
   return (((type == kDirect) || (type == kStatic)) &&
           fast_path && ((direct_code == 0) || (direct_method == 0)));
@@ -1287,7 +1287,7 @@
   if (cu_->disable_opt & (1 << kPromoteRegs)) {
     return;
   }
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     CountUses(bb);
   }
@@ -1331,7 +1331,7 @@
 
 void MIRGraph::VerifyDataflow() {
     /* Verify if all blocks are connected as claimed */
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     VerifyPredInfo(bb);
   }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index a12bf39..fb306de 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -96,9 +96,9 @@
       try_block_addr_(NULL),
       entry_block_(NULL),
       exit_block_(NULL),
-      cur_block_(NULL),
       num_blocks_(0),
       current_code_item_(NULL),
+      dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc),
       current_method_(kInvalidEntry),
       current_offset_(kInvalidEntry),
       def_count_(0),
@@ -108,7 +108,9 @@
       attributes_(METHOD_IS_LEAF),  // Start with leaf assumption, change on encountering invoke.
       checkstats_(NULL),
       special_case_(kNoHandler),
-      arena_(arena) {
+      arena_(arena),
+      backward_branches_(0),
+      forward_branches_(0) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
 }
 
@@ -150,7 +152,7 @@
   orig_block->terminated_by_return = false;
 
   /* Add it to the quick lookup cache */
-  block_map_.Put(bottom_block->start_offset, bottom_block);
+  dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
 
   /* Handle the taken path */
   bottom_block->taken = orig_block->taken;
@@ -195,6 +197,23 @@
     DCHECK_EQ(*immed_pred_block_p, orig_block);
     *immed_pred_block_p = bottom_block;
   }
+
+  // Associate dex instructions in the bottom block with the new container.
+  MIR* p = bottom_block->first_mir_insn;
+  while (p != NULL) {
+    int opcode = p->dalvikInsn.opcode;
+    /*
+     * Some messiness here to ensure that we only enter real opcodes and only the
+     * first half of a potentially throwing instruction that has been split into
+     * CHECK and work portions.  The 2nd half of a split operation will have a non-null
+     * throw_insn pointer that refers to the 1st half.
+     */
+    if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+      dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+    }
+    p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
+  }
+
   return bottom_block;
 }
 
@@ -208,39 +227,37 @@
  */
 BasicBlock* MIRGraph::FindBlock(unsigned int code_offset, bool split, bool create,
                                 BasicBlock** immed_pred_block_p) {
-  BasicBlock* bb;
-  unsigned int i;
-  SafeMap<unsigned int, BasicBlock*>::iterator it;
-
-  it = block_map_.find(code_offset);
-  if (it != block_map_.end()) {
-    return it->second;
-  } else if (!create) {
+  if (code_offset >= cu_->code_item->insns_size_in_code_units_) {
     return NULL;
   }
 
-  if (split) {
-    for (i = 0; i < block_list_.Size(); i++) {
-      bb = block_list_.Get(i);
-      if (bb->block_type != kDalvikByteCode) continue;
-      /* Check if a branch jumps into the middle of an existing block */
-      if ((code_offset > bb->start_offset) && (bb->last_mir_insn != NULL) &&
-          (code_offset <= bb->last_mir_insn->offset)) {
-        BasicBlock *new_bb = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?
-                                       immed_pred_block_p : NULL);
-        return new_bb;
-      }
-    }
+  int block_id = dex_pc_to_block_map_.Get(code_offset);
+  BasicBlock* bb = (block_id == 0) ? NULL : block_list_.Get(block_id);
+
+  if ((bb != NULL) && (bb->start_offset == code_offset)) {
+    // Does this containing block start with the desired instruction?
+    return bb;
   }
 
-  /* Create a new one */
+  // No direct hit.
+  if (!create) {
+    return NULL;
+  }
+
+  if (bb != NULL) {
+    // The target exists somewhere in an existing block.
+    return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?  immed_pred_block_p : NULL);
+  }
+
+  // Create a new block.
   bb = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(bb);
   bb->start_offset = code_offset;
-  block_map_.Put(bb->start_offset, bb);
+  dex_pc_to_block_map_.Put(bb->start_offset, bb->id);
   return bb;
 }
 
+
 /* Identify code range in try blocks and set up the empty catch blocks */
 void MIRGraph::ProcessTryCatchBlocks() {
   int tries_size = current_code_item_->tries_size_;
@@ -306,6 +323,7 @@
     default:
       LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
   }
+  CountBranch(target);
   BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block);
   cur_block->taken = taken_block;
@@ -484,6 +502,9 @@
    * pseudo exception edge MIR.  Note also that this new block is
    * not automatically terminated after the work portion, and may
    * contain following instructions.
+   *
+   * Note also that the dex_pc_to_block_map_ entry for the potentially
+   * throwing instruction will refer to the original basic block.
    */
   BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(new_block);
@@ -517,7 +538,10 @@
       current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
 
   // TODO: need to rework expansion of block list & try_block_addr when inlining activated.
+  // TUNING: use better estimate of basic blocks for following resize.
   block_list_.Resize(block_list_.Size() + current_code_item_->insns_size_in_code_units_);
+  dex_pc_to_block_map_.SetSize(dex_pc_to_block_map_.Size() + current_code_item_->insns_size_in_code_units_);
+
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
   try_block_addr_->ClearBit(current_code_item_->insns_size_in_code_units_);
@@ -557,10 +581,7 @@
   DCHECK_EQ(current_offset_, 0);
   cur_block->start_offset = current_offset_;
   block_list_.Insert(cur_block);
-  /* Add first block to the fast lookup cache */
-// FIXME: block map needs association with offset/method pair rather than just offset
-  block_map_.Put(cur_block->start_offset, cur_block);
-// FIXME: this needs to insert at the insert point rather than entry block.
+  // FIXME: this needs to insert at the insert point rather than entry block.
   entry_block_->fall_through = cur_block;
   cur_block->predecessors->Insert(entry_block_);
 
@@ -586,7 +607,6 @@
       opcode_count_[static_cast<int>(opcode)]++;
     }
 
-
     /* Possible simple method? */
     if (live_pattern) {
       live_pattern = false;
@@ -637,6 +657,9 @@
       AppendMIR(cur_block, insn);
     }
 
+    // Associate the starting dex_pc for this opcode with its containing basic block.
+    dex_pc_to_block_map_.Put(insn->offset, cur_block->id);
+
     code_ptr += width;
 
     if (flags & Instruction::kBranch) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 6f8bd85..5d01489 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -569,6 +569,26 @@
     return IsBackedge(branch_bb, branch_bb->taken) || IsBackedge(branch_bb, branch_bb->fall_through);
   }
 
+  void CountBranch(int target_offset) {
+    if (target_offset <= current_offset_) {
+      backward_branches_++;
+    } else {
+      forward_branches_++;
+    }
+  }
+
+  int GetBranchCount() {
+    return backward_branches_ + forward_branches_;
+  }
+
+  bool IsPseudoMirOp(Instruction::Code opcode) {
+    return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
+  }
+
+  bool IsPseudoMirOp(int opcode) {
+    return opcode >= static_cast<int>(kMirOpFirst);
+  }
+
   void BasicBlockCombine();
   void CodeLayout();
   void DumpCheckStats();
@@ -580,11 +600,34 @@
   void SSATransformation();
   void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb);
   void NullCheckElimination();
+  /*
+   * Type inference handling helpers.  Because Dalvik's bytecode is not fully typed,
+   * we have to do some work to figure out the sreg type.  For some operations it is
+   * clear based on the opcode (i.e. ADD_FLOAT v0, v1, v2), but for others (MOVE), we
+   * may never know the "real" type.
+   *
+   * We perform the type inference operation by using an iterative  walk over
+   * the graph, propagating types "defined" by typed opcodes to uses and defs in
+   * non-typed opcodes (such as MOVE).  The Setxx(index) helpers are used to set defined
+   * types on typed opcodes (such as ADD_INT).  The Setxx(index, is_xx) form is used to
+   * propagate types through non-typed opcodes such as PHI and MOVE.  The is_xx flag
+   * tells whether our guess of the type is based on a previously typed definition.
+   * If so, the defined type takes precedence.  Note that it's possible to have the same sreg
+   * show multiple defined types because dx treats constants as untyped bit patterns.
+   * The return value of the Setxx() helpers says whether or not the Setxx() action changed
+   * the current guess, and is used to know when to terminate the iterative walk.
+   */
   bool SetFp(int index, bool is_fp);
+  bool SetFp(int index);
   bool SetCore(int index, bool is_core);
+  bool SetCore(int index);
   bool SetRef(int index, bool is_ref);
+  bool SetRef(int index);
   bool SetWide(int index, bool is_wide);
+  bool SetWide(int index);
   bool SetHigh(int index, bool is_high);
+  bool SetHigh(int index);
+
   void AppendMIR(BasicBlock* bb, MIR* mir);
   void PrependMIR(BasicBlock* bb, MIR* mir);
   void InsertMIRAfter(BasicBlock* bb, MIR* current_mir, MIR* new_mir);
@@ -702,15 +745,14 @@
   ArenaBitVector* try_block_addr_;
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
-  BasicBlock* cur_block_;
   int num_blocks_;
   const DexFile::CodeItem* current_code_item_;
-  SafeMap<unsigned int, BasicBlock*> block_map_;  // FindBlock lookup cache.
+  GrowableArray<uint16_t> dex_pc_to_block_map_;  // FindBlock lookup cache.
   std::vector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
   typedef std::pair<int, int> MIRLocation;       // Insert point, (m_unit_ index, offset)
   std::vector<MIRLocation> method_stack_;        // Include stack
   int current_method_;
-  int current_offset_;
+  int current_offset_;                           // Dex offset in code units
   int def_count_;                                // Used to estimate size of ssa name storage.
   int* opcode_count_;                            // Dex opcode coverage stats.
   int num_ssa_regs_;                             // Number of names following SSA transformation.
@@ -720,6 +762,8 @@
   Checkstats* checkstats_;
   SpecialCaseHandler special_case_;
   ArenaAllocator* arena_;
+  int backward_branches_;
+  int forward_branches_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index b7611f8..05e428e 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -96,7 +96,7 @@
   is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false);
   constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(),
                                                      ArenaAllocator::kAllocDFInfo));
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     DoConstantPropogation(bb);
   }
@@ -762,11 +762,11 @@
 void MIRGraph::NullCheckElimination() {
   if (!(cu_->disable_opt & (1 << kNullCheckElimination))) {
     DCHECK(temp_ssa_register_v_ != NULL);
-    AllNodesIterator iter(this, false /* not iterative */);
+    AllNodesIterator iter(this);
     for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
       NullCheckEliminationInit(bb);
     }
-    PreOrderDfsIterator iter2(this, true /* iterative */);
+    RepeatingPreOrderDfsIterator iter2(this);
     bool change = false;
     for (BasicBlock* bb = iter2.Next(change); bb != NULL; bb = iter2.Next(change)) {
       change = EliminateNullChecks(bb);
@@ -778,7 +778,7 @@
 }
 
 void MIRGraph::BasicBlockCombine() {
-  PreOrderDfsIterator iter(this, false /* not iterative */);
+  PreOrderDfsIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     CombineBlocks(bb);
   }
@@ -791,7 +791,7 @@
   if (cu_->enable_debug & (1 << kDebugVerifyDataflow)) {
     VerifyDataflow();
   }
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     LayoutBlocks(bb);
   }
@@ -804,7 +804,7 @@
   Checkstats* stats =
       static_cast<Checkstats*>(arena_->Alloc(sizeof(Checkstats), ArenaAllocator::kAllocDFInfo));
   checkstats_ = stats;
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     CountChecks(bb);
   }
@@ -858,7 +858,7 @@
   if (!(cu_->disable_opt & (1 << kBBOpt))) {
     DCHECK_EQ(cu_->num_compiler_temps, 0);
     ClearAllVisitedFlags();
-    PreOrderDfsIterator iter2(this, false /* not iterative */);
+    PreOrderDfsIterator iter2(this);
     for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
       BuildExtendedBBList(bb);
     }
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 90cec75..df10f7e 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -30,10 +30,10 @@
 #include "dex/compiler_internals.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/frontend.h"
-#include "mir_to_gbc.h"
-
 #include "llvm/llvm_compilation_unit.h"
 #include "llvm/utils_llvm.h"
+#include "mir_to_gbc.h"
+#include "thread-inl.h"
 
 const char* kLabelFormat = "%c0x%x_%d";
 const char kInvalidBlock = 0xff;
@@ -1877,7 +1877,7 @@
   CreateFunction();
 
   // Create an LLVM basic block for each MIR block in dfs preorder
-  PreOrderDfsIterator iter(mir_graph_, false /* not iterative */);
+  PreOrderDfsIterator iter(mir_graph_);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     CreateLLVMBasicBlock(bb);
   }
@@ -1909,7 +1909,7 @@
     }
   }
 
-  PreOrderDfsIterator iter2(mir_graph_, false /* not iterative */);
+  PreOrderDfsIterator iter2(mir_graph_);
   for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
     BlockBitcodeConversion(bb);
   }
@@ -1972,7 +1972,7 @@
 
     ::llvm::OwningPtr< ::llvm::tool_output_file> out_file(
         new ::llvm::tool_output_file(fname.c_str(), errmsg,
-                                   ::llvm::sys::fs::F_Binary));
+                                   ::llvm::raw_fd_ostream::F_Binary));
 
     if (!errmsg.empty()) {
       LOG(ERROR) << "Failed to create bitcode output file: " << errmsg;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 2f54190..d184673 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -462,7 +462,7 @@
 
 // Instruction assembly field_loc kind.
 enum ArmEncodingKind {
-  kFmtUnused,
+  kFmtUnused,    // Unused field and marks end of formats.
   kFmtBitBlt,    // Bit string using end/start.
   kFmtDfp,       // Double FP reg.
   kFmtSfp,       // Single FP reg.
@@ -477,6 +477,7 @@
   kFmtBrOffset,  // Signed extended [26,11,13,21-16,10-0]:0.
   kFmtFPImm,     // Encoded floating point immediate.
   kFmtOff24,     // 24-bit Thumb2 unconditional branch encoding.
+  kFmtSkip,      // Unused field, but continue to next.
 };
 
 // Struct used to define the snippet positions for each Thumb opcode.
@@ -492,6 +493,7 @@
   const char* name;
   const char* fmt;
   int size;   // Note: size is in bytes.
+  FixupKind fixup;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 0649c9f..3c646c4 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -37,9 +37,9 @@
  * fmt: for pretty-printing
  */
 #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
-                     k3, k3s, k3e, flags, name, fmt, size) \
+                     k3, k3s, k3e, flags, name, fmt, size, fixup) \
         {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
-                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size}
+                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup}
 
 /* Instruction dump string format keys: !pf, where "!" is the start
  * of the key, "p" is which numeric operand to use and "f" is the
@@ -79,916 +79,938 @@
 const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
     ENCODING_MAP(kArm16BitData,    0x0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2),
+                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone),
     ENCODING_MAP(kThumbAdcRR,        0x4140,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C", 2),
+                 "adcs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRI3,      0x1c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2d", 2),
+                 "adds", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRI8,       0x3000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "adds", "!0C, !0C, #!1d", 2),
+                 "adds", "!0C, !0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRR,       0x1800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C", 2),
+                 "adds", "!0C, !1C, !2C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRLH,     0x4440,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2),
+                 "add", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRHL,     0x4480,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2),
+                 "add", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRHH,     0x44c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2),
+                 "add", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddPcRel,    0xa000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "add", "!0C, pc, #!1E", 2),
+                 "add", "!0C, pc, #!1E", 2, kFixupLoad),
     ENCODING_MAP(kThumbAddSpRel,    0xa800,
-                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "!0C, sp, #!2E", 2),
+                 "add", "!0C, sp, #!2E", 2, kFixupNone),
     ENCODING_MAP(kThumbAddSpI7,      0xb000,
                  kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "sp, #!0d*4", 2),
+                 "add", "sp, #!0d*4", 2, kFixupNone),
     ENCODING_MAP(kThumbAndRR,        0x4000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "ands", "!0C, !1C", 2),
+                 "ands", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAsrRRI5,      0x1000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "asrs", "!0C, !1C, #!2d", 2),
+                 "asrs", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbAsrRR,        0x4100,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "asrs", "!0C, !1C", 2),
+                 "asrs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbBCond,        0xd000,
                  kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
-                 NEEDS_FIXUP, "b!1c", "!0t", 2),
+                 NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch),
     ENCODING_MAP(kThumbBUncond,      0xe000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "b", "!0t", 2),
+                 "b", "!0t", 2, kFixupT1Branch),
     ENCODING_MAP(kThumbBicRR,        0x4380,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "bics", "!0C, !1C", 2),
+                 "bics", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbBkpt,          0xbe00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bkpt", "!0d", 2),
+                 "bkpt", "!0d", 2, kFixupNone),
     ENCODING_MAP(kThumbBlx1,         0xf000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_1", "!0u", 2),
+                 NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1),
     ENCODING_MAP(kThumbBlx2,         0xe800,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_2", "!0v", 2),
+                 NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel),
     ENCODING_MAP(kThumbBl1,          0xf000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "bl_1", "!0u", 2),
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl_1", "!0u", 2, kFixupBl1),
     ENCODING_MAP(kThumbBl2,          0xf800,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "bl_2", "!0v", 2),
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl_2", "!0v", 2, kFixupLabel),
     ENCODING_MAP(kThumbBlxR,         0x4780,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
-                 "blx", "!0C", 2),
+                 "blx", "!0C", 2, kFixupNone),
     ENCODING_MAP(kThumbBx,            0x4700,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bx", "!0C", 2),
+                 "bx", "!0C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmnRR,        0x42c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0C, !1C", 2),
+                 "cmn", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpRI8,       0x2800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1d", 2),
+                 "cmp", "!0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpRR,        0x4280,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpLH,        0x4540,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpHL,        0x4580,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpHH,        0x45c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbEorRR,        0x4040,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "eors", "!0C, !1C", 2),
+                 "eors", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbLdmia,         0xc800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 2),
+                 "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrRRI5,      0x6800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #!2E]", 2),
+                 "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrRRR,       0x5800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C]", 2),
+                 "ldr", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrPcRel,    0x4800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
-                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2),
+                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
     ENCODING_MAP(kThumbLdrSpRel,    0x9800,
-                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
-                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2),
+                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, #2d]", 2),
+                 "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C]", 2),
+                 "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, #!2F]", 2),
+                 "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C]", 2),
+                 "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrsbRRR,     0x5600,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C]", 2),
+                 "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrshRRR,     0x5e00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C]", 2),
+                 "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLslRRI5,      0x0000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsls", "!0C, !1C, #!2d", 2),
+                 "lsls", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbLslRR,        0x4080,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsls", "!0C, !1C", 2),
+                 "lsls", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbLsrRRI5,      0x0800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsrs", "!0C, !1C, #!2d", 2),
+                 "lsrs", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbLsrRR,        0x40c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsrs", "!0C, !1C", 2),
+                 "lsrs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovImm,       0x2000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
-                 "movs", "!0C, #!1d", 2),
+                 "movs", "!0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR,        0x1c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "movs", "!0C, !1C", 2),
+                 "movs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2),
+                 "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2),
+                 "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2),
+                 "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMul,           0x4340,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "muls", "!0C, !1C", 2),
+                 "muls", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMvn,           0x43c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "mvns", "!0C, !1C", 2),
+                 "mvns", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbNeg,           0x4240,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "negs", "!0C, !1C", 2),
+                 "negs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbOrr,           0x4300,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "orrs", "!0C, !1C", 2),
+                 "orrs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbPop,           0xbc00,
                  kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD, "pop", "<!0R>", 2),
+                 | IS_LOAD, "pop", "<!0R>", 2, kFixupNone),
     ENCODING_MAP(kThumbPush,          0xb400,
                  kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE, "push", "<!0R>", 2),
+                 | IS_STORE, "push", "<!0R>", 2, kFixupNone),
     ENCODING_MAP(kThumbRorRR,        0x41c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "rors", "!0C, !1C", 2),
+                 "rors", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbSbc,           0x4180,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C", 2),
+                 "sbcs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbStmia,         0xc000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 2),
+                 "stmia", "!0C!!, <!1R>", 2, kFixupNone),
     ENCODING_MAP(kThumbStrRRI5,      0x6000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #!2E]", 2),
+                 "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrRRR,       0x5000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C]", 2),
+                 "str", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrSpRel,    0x9000,
-                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
-                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2),
+                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrbRRI5,     0x7000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strb", "!0C, [!1C, #!2d]", 2),
+                 "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrbRRR,      0x5400,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C]", 2),
+                 "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrhRRI5,     0x8000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strh", "!0C, [!1C, #!2F]", 2),
+                 "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrhRRR,      0x5200,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C]", 2),
+                 "strh", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbSubRRI3,      0x1e00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2d", 2),
+                 "subs", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbSubRI8,       0x3800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "subs", "!0C, #!1d", 2),
+                 "subs", "!0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbSubRRR,       0x1a00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C", 2),
+                 "subs", "!0C, !1C, !2C", 2, kFixupNone),
     ENCODING_MAP(kThumbSubSpI7,      0xb080,
                  kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "sub", "sp, #!0d*4", 2),
+                 "sub", "sp, #!0d*4", 2, kFixupNone),
     ENCODING_MAP(kThumbSwi,           0xdf00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "swi", "!0d", 2),
+                 "swi", "!0d", 2, kFixupNone),
     ENCODING_MAP(kThumbTst,           0x4200,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C", 2),
+                 "tst", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4),
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
     ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4),
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
     ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuls", "!0s, !1s, !2s", 4),
+                 "vmuls", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmuld,        0xee200b00,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuld", "!0S, !1S, !2S", 4),
+                 "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "vstr", "!0s, [!1C, #!2E]", 4),
+                 "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "vstr", "!0S, [!1C, #!2E]", 4),
+                 "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0s, !1s, !2s", 4),
+                 "vsub", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsubd,        0xee300b40,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0S, !1S, !2S", 4),
+                 "vsub", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vadds,        0xee300a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0s, !1s, !2s", 4),
+                 "vadd", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vaddd,        0xee300b00,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0S, !1S, !2S", 4),
+                 "vadd", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vdivs,        0xee800a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivs", "!0s, !1s, !2s", 4),
+                 "vdivs", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vdivd,        0xee800b00,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivd", "!0S, !1S, !2S", 4),
+                 "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32", "!0s, !1s", 4),
+                 "vcvt.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtID,       0xeeb80bc0,
                  kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64", "!0S, !1s", 4),
+                 "vcvt.f64", "!0S, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtFI,       0xeebd0ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f32 ", "!0s, !1s", 4),
+                 "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtDI,       0xeebd0bc0,
                  kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f64 ", "!0s, !1S", 4),
+                 "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtFd,       0xeeb70ac0,
                  kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.f32 ", "!0S, !1s", 4),
+                 "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtDF,       0xeeb70bc0,
                  kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32.f64 ", "!0s, !1S", 4),
+                 "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f32 ", "!0s, !1s", 4),
+                 "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsqrtd,       0xeeb10bc0,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f64 ", "!0S, !1S", 4),
+                 "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1m", 4),
+                 "mov", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovImm16,       0xf2400000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1M", 4),
+                 "mov", "!0C, #!1M", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #!2d]", 4),
+                 "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #!2d]", 4),
+                 "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #-!2d]", 4),
+                 "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #-!2d]", 4),
+                 "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2),
+                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ),
     ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2),
+                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ),
     ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "add", "!0C,!1C,#!2d", 4),
+                 "add", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 4),
+                 "mov", "!0C, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f32 ", " !0s, !1s", 4),
+                 "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f64 ", " !0S, !1S", 4),
+                 "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4),
+                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Stmia,         0xe8800000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 4),
+                 "stmia", "!0C!!, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2AddRRR,  0xeb100000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C!3H", 4),
+                 "adds", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubRRR,       0xebb00000, /* setflags enconding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C!3H", 4),
+                 "subs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2SbcRRR,       0xeb700000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C, !2C!3H", 4),
+                 "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2CmpRR,       0xebb00f00,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 4),
+                 "cmp", "!0C, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubRRI12,       0xf2a00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "sub", "!0C,!1C,#!2d", 4),
+                 "sub", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2MvnImm12,  0xf06f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mvn", "!0C, #!1n", 4),
+                 "mvn", "!0C, #!1n", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "sel", "!0C, !1C, !2C", 4),
+                 "sel", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ubfx,       0xf3c00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
                  kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "ubfx", "!0C, !1C, #!2d, #!3d", 4),
+                 "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sbfx,       0xf3400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
                  kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "sbfx", "!0C, !1C, #!2d, #!3d", 4),
+                 "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, #!2d]", 4),
+                 "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, #!2d]", 4),
+                 "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, #!2d]", 4),
+                 "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, #!2d]", 4),
+                 "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strh", "!0C, [!1C, #!2d]", 4),
+                 "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strb", "!0C, [!1C, #!2d]", 4),
+                 "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4),
+                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
     ENCODING_MAP(kThumb2Push,          0xe92d0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4),
+                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
     ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1m", 4),
+                 "cmp", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adcs", "!0C, !1C, !2C!3H", 4),
+                 "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2AndRRR,  0xea000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "and", "!0C, !1C, !2C!3H", 4),
+                 "and", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2BicRRR,  0xea200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "bic", "!0C, !1C, !2C!3H", 4),
+                 "bic", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2CmnRR,  0xeb000000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "cmn", "!0C, !1C, shift !2d", 4),
+                 "cmn", "!0C, !1C, shift !2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2EorRRR,  0xea800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "eor", "!0C, !1C, !2C!3H", 4),
+                 "eor", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2MulRRR,  0xfb00f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0C, !1C, !2C", 4),
+                 "mul", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "mvn", "!0C, !1C, shift !2d", 4),
+                 "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2RsubRRI8,       0xf1d00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsb", "!0C,!1C,#!2m", 4),
+                 "rsb", "!0C,!1C,#!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "neg", "!0C,!1C", 4),
+                 "neg", "!0C,!1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "orr", "!0C, !1C, !2C!3H", 4),
+                 "orr", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2TstRR,       0xea100f00,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C, shift !2d", 4),
+                 "tst", "!0C, !1C, shift !2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LslRRR,  0xfa00f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsl", "!0C, !1C, !2C", 4),
+                 "lsl", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2LsrRRR,  0xfa20f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsr", "!0C, !1C, !2C", 4),
+                 "lsr", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2AsrRRR,  0xfa40f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "asr", "!0C, !1C, !2C", 4),
+                 "asr", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2RorRRR,  0xfa60f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "ror", "!0C, !1C, !2C", 4),
+                 "ror", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2LslRRI5,  0xea4f0000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsl", "!0C, !1C, #!2d", 4),
+                 "lsl", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LsrRRI5,  0xea4f0010,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsr", "!0C, !1C, #!2d", 4),
+                 "lsr", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2AsrRRI5,  0xea4f0020,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "asr", "!0C, !1C, #!2d", 4),
+                 "asr", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2RorRRI5,  0xea4f0030,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "ror", "!0C, !1C, #!2d", 4),
+                 "ror", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2BicRRI8,  0xf0200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "bic", "!0C, !1C, #!2m", 4),
+                 "bic", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AndRRI8,  0xf0000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "and", "!0C, !1C, #!2m", 4),
+                 "and", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2OrrRRI8,  0xf0400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "orr", "!0C, !1C, #!2m", 4),
+                 "orr", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2EorRRI8,  0xf0800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "eor", "!0C, !1C, #!2m", 4),
+                 "eor", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AddRRI8,  0xf1100000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2m", 4),
+                 "adds", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRI8,  0xf1500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C, #!2m", 4),
+                 "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubRRI8,  0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2m", 4),
+                 "subs", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2SbcRRI8,  0xf1700000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "sbcs", "!0C, !1C, #!2m", 4),
+                 "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2It,  0xbf00,
                  kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
-                 "it:!1b", "!0c", 2),
+                 "it:!1b", "!0c", 2, kFixupNone),
     ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
-                 "fmstat", "", 4),
+                 "fmstat", "", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
-                 "vcmp.f64", "!0S, !1S", 4),
+                 "vcmp.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
-                 "vcmp.f32", "!0s, !1s", 4),
+                 "vcmp.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldr", "!0C, [r15pc, #!1d]", 4),
+                 "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
-                 "b!1c", "!0t", 4),
+                 "b!1c", "!0t", 4, kFixupCondBranch),
     ENCODING_MAP(kThumb2Vmovd_RR,       0xeeb00b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f64", "!0S, !1S", 4),
+                 "vmov.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs_RR,       0xeeb00a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f32", "!0s, !1s", 4),
+                 "vmov.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmrs,       0xee100a10,
                  kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmrs", "!0C, !1s", 4),
+                 "fmrs", "!0C, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmsr,       0xee000a10,
                  kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmsr", "!0s, !1C", 4),
+                 "fmsr", "!0s, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmrrd,       0xec500b10,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
-                 "fmrrd", "!0C, !1C, !2S", 4),
+                 "fmrrd", "!0C, !1C, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmdrr,       0xec400b10,
                  kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmdrr", "!0S, !1C, !2C", 4),
+                 "fmdrr", "!0S, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vabsd,       0xeeb00bc0,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f64", "!0S, !1S", 4),
+                 "vabs.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vabss,       0xeeb00ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f32", "!0s, !1s", 4),
+                 "vabs.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vnegd,       0xeeb10b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f64", "!0S, !1S", 4),
+                 "vneg.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vnegs,       0xeeb10a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f32", "!0s, !1s", 4),
+                 "vneg.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs_IMM8,       0xeeb00a00,
                  kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f32", "!0s, #0x!1h", 4),
+                 "vmov.f32", "!0s, #0x!1h", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovd_IMM8,       0xeeb00b00,
                  kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f64", "!0S, #0x!1h", 4),
+                 "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
     ENCODING_MAP(kThumb2Mla,  0xfb000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 15, 12,
                  IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3,
-                 "mla", "!0C, !1C, !2C, !3C", 4),
+                 "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Umull,  0xfba00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 3, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "umull", "!0C, !1C, !2C, !3C", 4),
+                 "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrex", "!0C, [!1C, #!2E]", 4),
+                 "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Strex,       0xe8400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
-                 "strex", "!0C,!1C, [!2C, #!2E]", 4),
+                 "strex", "!0C,!1C, [!2C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
-                 "clrex", "", 4),
+                 "clrex", "", 4, kFixupNone),
     ENCODING_MAP(kThumb2Bfi,         0xf3600000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
                  kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "bfi", "!0C,!1C,#!2d,#!3d", 4),
+                 "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Bfc,         0xf36f0000,
                  kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
-                 "bfc", "!0C,#!1d,#!2d", 4),
+                 "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
                  kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP,
-                 "dmb", "#!0B", 4),
+                 "dmb", "#!0B", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
-                 "ldr", "!0C, [r15pc, -#!1d]", 4),
+                 "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Stm,          0xe9000000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stm", "!0C, <!1R>", 4),
+                 "stm", "!0C, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumbUndefined,       0xde00,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
-                 "undefined", "", 2),
+                 "undefined", "", 2, kFixupNone),
     // NOTE: vpop, vpush hard-encoded for s16+ reg list
     ENCODING_MAP(kThumb2VPopCS,       0xecbd8a00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
-                 | IS_LOAD, "vpop", "<!0P>", 4),
+                 | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone),
     ENCODING_MAP(kThumb2VPushCS,      0xed2d8a00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
-                 | IS_STORE, "vpush", "<!0P>", 4),
+                 | IS_STORE, "vpush", "<!0P>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vldms,        0xec900a00,
                  kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
-                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4),
+                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstms,        0xec800a00,
                  kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
-                 | IS_STORE, "vstms", "!0C, <!2Q>", 4),
+                 | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone),
     ENCODING_MAP(kThumb2BUncond,      0xf0009000,
                  kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
-                 "b", "!0t", 4),
+                 "b", "!0t", 4, kFixupT2Branch),
     ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
-                 "movt", "!0C, #!1M", 4),
+                 "movt", "!0C, #!1M", 4, kFixupNone),
     ENCODING_MAP(kThumb2AddPCR,      0x4487,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_USE0 | IS_BRANCH,
-                 "add", "rPC, !0C", 2),
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+                 "add", "rPC, !0C", 2, kFixupLabel),
     ENCODING_MAP(kThumb2Adr,         0xf20f0000,
                  kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  /* Note: doesn't affect flags */
                  IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "adr", "!0C,#!1d", 4),
+                 "adr", "!0C,#!1d", 4, kFixupAdr),
     ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "mov", "!0C, #!1M", 4),
+                 "mov", "!0C, #!1M", 4, kFixupMovImmLST),
     ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
-                 "movt", "!0C, #!1M", 4),
+                 "movt", "!0C, #!1M", 4, kFixupMovImmHST),
     ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4),
+                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubsRRI12,       0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C,!1C,#!2d", 4),
+                 "subs", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2OrrRRRs,  0xea500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "orrs", "!0C, !1C, !2C!3H", 4),
+                 "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2Push1,    0xf84d0d04,
                  kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0
-                 | IS_STORE, "push1", "!0C", 4),
+                 | IS_STORE, "push1", "!0C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Pop1,    0xf85d0b04,
                  kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
-                 | IS_LOAD, "pop1", "!0C", 4),
+                 | IS_LOAD, "pop1", "!0C", 4, kFixupNone),
     ENCODING_MAP(kThumb2RsubRRR,  0xebd00000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "rsbs", "!0C, !1C, !2C!3H", 4),
+                 "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2Smull,  0xfb800000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 3, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "smull", "!0C, !1C, !2C, !3C", 4),
+                 "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldrd", "!0C, !1C, [pc, #!2E]", 4),
+                 "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
-                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4),
+                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0,
                  IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
-                 "strd", "!0C, !1C, [!2C, #!3E]", 4),
+                 "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
 };
 
+// new_lir replaces orig_lir in the pcrel_fixup list.
+void ArmMir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+  orig_lir->flags.fixup = kFixupNone;
+}
+
+// new_lir is inserted before orig_lir in the pcrel_fixup list.
+void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+}
+
 /*
  * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
  * not ready. Since r5FP is not updated often, it is less likely to
@@ -997,284 +1019,16 @@
  */
 #define PADDING_MOV_R5_R5               0x1C2D
 
-/*
- * Assemble the LIR into binary instruction format.  Note that we may
- * discover that pc-relative displacements may not fit the selected
- * instruction.
- */
-AssemblerStatus ArmMir2Lir::AssembleInstructions(uintptr_t start_addr) {
-  LIR* lir;
-  AssemblerStatus res = kSuccess;  // Assume success
-
-  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
-    if (lir->opcode < 0) {
-      /* 1 means padding is needed */
-      if ((lir->opcode == kPseudoPseudoAlign4) && (lir->operands[0] == 1)) {
-        code_buffer_.push_back(PADDING_MOV_R5_R5 & 0xFF);
-        code_buffer_.push_back((PADDING_MOV_R5_R5 >> 8) & 0xFF);
-      }
-      continue;
+void ArmMir2Lir::EncodeLIR(LIR* lir) {
+  int opcode = lir->opcode;
+  if (IsPseudoLirOp(opcode)) {
+    if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
+      // Note: size for this opcode will be either 0 or 2 depending on final alignment.
+      lir->u.a.bytes[0] = (PADDING_MOV_R5_R5 & 0xff);
+      lir->u.a.bytes[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
+      lir->flags.size = (lir->offset & 0x2);
     }
-
-    if (lir->flags.is_nop) {
-      continue;
-    }
-
-    /*
-     * For PC-relative displacements we won't know if the
-     * selected instruction will work until late (i.e. - now).
-     * If something doesn't fit, we must replace the short-form
-     * operation with a longer-form one.  Note, though, that this
-     * can change code we've already processed, so we'll need to
-     * re-calculate offsets and restart.  To limit the number of
-     * restarts, the entire list will be scanned and patched.
-     * Of course, the patching itself may cause new overflows so this
-     * is an iterative process.
-     */
-    if (lir->flags.pcRelFixup) {
-      if (lir->opcode == kThumbLdrPcRel ||
-          lir->opcode == kThumb2LdrPcRel12 ||
-          lir->opcode == kThumbAddPcRel ||
-          lir->opcode == kThumb2LdrdPcRel8 ||
-          ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
-          ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
-        /*
-         * PC-relative loads are mostly used to load immediates
-         * that are too large to materialize directly in one shot.
-         * However, if the load displacement exceeds the limit,
-         * we revert to a multiple-instruction materialization sequence.
-         */
-        LIR *lir_target = lir->target;
-        uintptr_t pc = (lir->offset + 4) & ~3;
-        uintptr_t target = lir_target->offset;
-        int delta = target - pc;
-        if (delta & 0x3) {
-          LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-        }
-        // First, a sanity check for cases we shouldn't see now
-        if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
-            ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) {
-          // Shouldn't happen in current codegen.
-          LOG(FATAL) << "Unexpected pc-rel offset " << delta;
-        }
-        // Now, check for the difficult cases
-        if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
-            ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
-            ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
-            ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
-          /*
-           * Note: because rARM_LR may be used to fix up out-of-range
-           * vldrs/vldrd we include REG_DEF_LR in the resource
-           * masks for these instructions.
-           */
-          int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12))
-              ?  lir->operands[0] : rARM_LR;
-
-          // Add new Adr to generate the address.
-          LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
-                     base_reg, 0, 0, 0, 0, lir->target);
-          InsertLIRBefore(lir, new_adr);
-
-          // Convert to normal load.
-          if (lir->opcode == kThumb2LdrPcRel12) {
-            lir->opcode = kThumb2LdrRRI12;
-          } else if (lir->opcode == kThumb2LdrdPcRel8) {
-            lir->opcode = kThumb2LdrdI8;
-          }
-          // Change the load to be relative to the new Adr base.
-          if (lir->opcode == kThumb2LdrdI8) {
-            lir->operands[3] = 0;
-            lir->operands[2] = base_reg;
-          } else {
-            lir->operands[2] = 0;
-            lir->operands[1] = base_reg;
-          }
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        } else {
-          if ((lir->opcode == kThumb2Vldrs) ||
-              (lir->opcode == kThumb2Vldrd) ||
-              (lir->opcode == kThumb2LdrdPcRel8)) {
-            lir->operands[2] = delta >> 2;
-          } else {
-            lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
-                delta >> 2;
-          }
-        }
-      } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) {
-        LIR *target_lir = lir->target;
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        int delta = target - pc;
-        if (delta > 126 || delta < 0) {
-          /*
-           * Convert to cmp rx,#0 / b[eq/ne] tgt pair
-           * Make new branch instruction and insert after
-           */
-          LIR* new_inst =
-            RawLIR(lir->dalvik_offset, kThumbBCond, 0,
-                   (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
-                   0, 0, 0, lir->target);
-          InsertLIRAfter(lir, new_inst);
-          /* Convert the cb[n]z to a cmp rx, #0 ] */
-          lir->opcode = kThumbCmpRI8;
-          /* operand[0] is src1 in both cb[n]z & CmpRI8 */
-          lir->operands[1] = 0;
-          lir->target = 0;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        } else {
-          lir->operands[1] = delta >> 1;
-        }
-      } else if (lir->opcode == kThumb2Push || lir->opcode == kThumb2Pop) {
-        if (__builtin_popcount(lir->operands[0]) == 1) {
-          /*
-           * The standard push/pop multiple instruction
-           * requires at least two registers in the list.
-           * If we've got just one, switch to the single-reg
-           * encoding.
-           */
-          lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
-              kThumb2Pop1;
-          int reg = 0;
-          while (lir->operands[0]) {
-            if (lir->operands[0] & 0x1) {
-              break;
-            } else {
-              reg++;
-              lir->operands[0] >>= 1;
-            }
-          }
-          lir->operands[0] = reg;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kThumbBCond || lir->opcode == kThumb2BCond) {
-        LIR *target_lir = lir->target;
-        int delta = 0;
-        DCHECK(target_lir);
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        delta = target - pc;
-        if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
-          lir->opcode = kThumb2BCond;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        }
-        lir->operands[0] = delta >> 1;
-      } else if (lir->opcode == kThumb2BUncond) {
-        LIR *target_lir = lir->target;
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        int delta = target - pc;
-        lir->operands[0] = delta >> 1;
-        if (!(cu_->disable_opt & (1 << kSafeOptimizations)) &&
-          lir->operands[0] == 0) {  // Useless branch
-          lir->flags.is_nop = true;
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kThumbBUncond) {
-        LIR *target_lir = lir->target;
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        int delta = target - pc;
-        if (delta > 2046 || delta < -2048) {
-          // Convert to Thumb2BCond w/ kArmCondAl
-          lir->opcode = kThumb2BUncond;
-          lir->operands[0] = 0;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        } else {
-          lir->operands[0] = delta >> 1;
-          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) &&
-            lir->operands[0] == -1) {  // Useless branch
-            lir->flags.is_nop = true;
-            res = kRetryAll;
-          }
-        }
-      } else if (lir->opcode == kThumbBlx1) {
-        DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
-        /* cur_pc is Thumb */
-        uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3;
-        uintptr_t target = lir->operands[1];
-
-        /* Match bit[1] in target with base */
-        if (cur_pc & 0x2) {
-          target |= 0x2;
-        }
-        int delta = target - cur_pc;
-        DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-        lir->operands[0] = (delta >> 12) & 0x7ff;
-        NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-      } else if (lir->opcode == kThumbBl1) {
-        DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
-        /* Both cur_pc and target are Thumb */
-        uintptr_t cur_pc = start_addr + lir->offset + 4;
-        uintptr_t target = lir->operands[1];
-
-        int delta = target - cur_pc;
-        DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-        lir->operands[0] = (delta >> 12) & 0x7ff;
-        NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-      } else if (lir->opcode == kThumb2Adr) {
-        SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]);
-        LIR* target = lir->target;
-        int target_disp = tab_rec ? tab_rec->offset
-                    : target->offset;
-        int disp = target_disp - ((lir->offset + 4) & ~3);
-        if (disp < 4096) {
-          lir->operands[1] = disp;
-        } else {
-          // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
-          // TUNING: if this case fires often, it can be improved.  Not expected to be common.
-          LIR *new_mov16L =
-              RawLIR(lir->dalvik_offset, kThumb2MovImm16LST,
-                     lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
-                     reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
-          InsertLIRBefore(lir, new_mov16L);
-          LIR *new_mov16H =
-              RawLIR(lir->dalvik_offset, kThumb2MovImm16HST,
-                     lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
-                     reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
-          InsertLIRBefore(lir, new_mov16H);
-          if (ARM_LOWREG(lir->operands[0])) {
-            lir->opcode = kThumbAddRRLH;
-          } else {
-            lir->opcode = kThumbAddRRHH;
-          }
-          lir->operands[1] = rARM_PC;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kThumb2MovImm16LST) {
-        // operands[1] should hold disp, [2] has add, [3] has tab_rec
-        LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
-        SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
-        // If tab_rec is null, this is a literal load. Use target
-        LIR* target = lir->target;
-        int target_disp = tab_rec ? tab_rec->offset : target->offset;
-        lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
-      } else if (lir->opcode == kThumb2MovImm16HST) {
-        // operands[1] should hold disp, [2] has add, [3] has tab_rec
-        LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
-        SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
-        // If tab_rec is null, this is a literal load. Use target
-        LIR* target = lir->target;
-        int target_disp = tab_rec ? tab_rec->offset : target->offset;
-        lir->operands[1] =
-            ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
-      }
-    }
-    /*
-     * If one of the pc-relative instructions expanded we'll have
-     * to make another pass.  Don't bother to fully assemble the
-     * instruction.
-     */
-    if (res != kSuccess) {
-      continue;
-    }
+  } else if (LIKELY(!lir->flags.is_nop)) {
     const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
     uint32_t bits = encoder->skeleton;
     int i;
@@ -1282,113 +1036,624 @@
       uint32_t operand;
       uint32_t value;
       operand = lir->operands[i];
-      switch (encoder->field_loc[i].kind) {
-        case kFmtUnused:
-          break;
-        case kFmtFPImm:
-          value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
-          value |= (operand & 0x0F) << encoder->field_loc[i].start;
-          bits |= value;
-          break;
-        case kFmtBrOffset:
-          value = ((operand  & 0x80000) >> 19) << 26;
-          value |= ((operand & 0x40000) >> 18) << 11;
-          value |= ((operand & 0x20000) >> 17) << 13;
-          value |= ((operand & 0x1f800) >> 11) << 16;
-          value |= (operand  & 0x007ff);
-          bits |= value;
-          break;
-        case kFmtShift5:
-          value = ((operand & 0x1c) >> 2) << 12;
-          value |= (operand & 0x03) << 6;
-          bits |= value;
-          break;
-        case kFmtShift:
-          value = ((operand & 0x70) >> 4) << 12;
-          value |= (operand & 0x0f) << 4;
-          bits |= value;
-          break;
-        case kFmtBWidth:
-          value = operand - 1;
-          bits |= value;
-          break;
-        case kFmtLsb:
-          value = ((operand & 0x1c) >> 2) << 12;
-          value |= (operand & 0x03) << 6;
-          bits |= value;
-          break;
-        case kFmtImm6:
-          value = ((operand & 0x20) >> 5) << 9;
-          value |= (operand & 0x1f) << 3;
-          bits |= value;
-          break;
-        case kFmtBitBlt:
-          value = (operand << encoder->field_loc[i].start) &
-              ((1 << (encoder->field_loc[i].end + 1)) - 1);
-          bits |= value;
-          break;
-        case kFmtDfp: {
-          DCHECK(ARM_DOUBLEREG(operand));
-          DCHECK_EQ((operand & 0x1), 0U);
-          int reg_name = (operand & ARM_FP_REG_MASK) >> 1;
-          /* Snag the 1-bit slice and position it */
-          value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
-          /* Extract and position the 4-bit slice */
-          value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
-          bits |= value;
-          break;
-        }
-        case kFmtSfp:
-          DCHECK(ARM_SINGLEREG(operand));
-          /* Snag the 1-bit slice and position it */
-          value = (operand & 0x1) << encoder->field_loc[i].end;
-          /* Extract and position the 4-bit slice */
-          value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
-          bits |= value;
-          break;
-        case kFmtImm12:
-        case kFmtModImm:
-          value = ((operand & 0x800) >> 11) << 26;
-          value |= ((operand & 0x700) >> 8) << 12;
-          value |= operand & 0x0ff;
-          bits |= value;
-          break;
-        case kFmtImm16:
-          value = ((operand & 0x0800) >> 11) << 26;
-          value |= ((operand & 0xf000) >> 12) << 16;
-          value |= ((operand & 0x0700) >> 8) << 12;
-          value |= operand & 0x0ff;
-          bits |= value;
-          break;
-        case kFmtOff24: {
-          uint32_t signbit = (operand >> 31) & 0x1;
-          uint32_t i1 = (operand >> 22) & 0x1;
-          uint32_t i2 = (operand >> 21) & 0x1;
-          uint32_t imm10 = (operand >> 11) & 0x03ff;
-          uint32_t imm11 = operand & 0x07ff;
-          uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
-          uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
-          value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
-              imm11;
-          bits |= value;
+      ArmEncodingKind kind = encoder->field_loc[i].kind;
+      if (LIKELY(kind == kFmtBitBlt)) {
+        value = (operand << encoder->field_loc[i].start) &
+            ((1 << (encoder->field_loc[i].end + 1)) - 1);
+        bits |= value;
+      } else {
+        switch (encoder->field_loc[i].kind) {
+          case kFmtSkip:
+            break;  // Nothing to do, but continue to next.
+          case kFmtUnused:
+            i = 4;  // Done, break out of the enclosing loop.
+            break;
+          case kFmtFPImm:
+            value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
+            value |= (operand & 0x0F) << encoder->field_loc[i].start;
+            bits |= value;
+            break;
+          case kFmtBrOffset:
+            value = ((operand  & 0x80000) >> 19) << 26;
+            value |= ((operand & 0x40000) >> 18) << 11;
+            value |= ((operand & 0x20000) >> 17) << 13;
+            value |= ((operand & 0x1f800) >> 11) << 16;
+            value |= (operand  & 0x007ff);
+            bits |= value;
+            break;
+          case kFmtShift5:
+            value = ((operand & 0x1c) >> 2) << 12;
+            value |= (operand & 0x03) << 6;
+            bits |= value;
+            break;
+          case kFmtShift:
+            value = ((operand & 0x70) >> 4) << 12;
+            value |= (operand & 0x0f) << 4;
+            bits |= value;
+            break;
+          case kFmtBWidth:
+            value = operand - 1;
+            bits |= value;
+            break;
+          case kFmtLsb:
+            value = ((operand & 0x1c) >> 2) << 12;
+            value |= (operand & 0x03) << 6;
+            bits |= value;
+            break;
+          case kFmtImm6:
+            value = ((operand & 0x20) >> 5) << 9;
+            value |= (operand & 0x1f) << 3;
+            bits |= value;
+            break;
+          case kFmtDfp: {
+            DCHECK(ARM_DOUBLEREG(operand));
+            DCHECK_EQ((operand & 0x1), 0U);
+            int reg_name = (operand & ARM_FP_REG_MASK) >> 1;
+            /* Snag the 1-bit slice and position it */
+            value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
+            /* Extract and position the 4-bit slice */
+            value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
+            bits |= value;
+            break;
           }
-          break;
-        default:
-          LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+          case kFmtSfp:
+            DCHECK(ARM_SINGLEREG(operand));
+            /* Snag the 1-bit slice and position it */
+            value = (operand & 0x1) << encoder->field_loc[i].end;
+            /* Extract and position the 4-bit slice */
+            value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
+            bits |= value;
+            break;
+          case kFmtImm12:
+          case kFmtModImm:
+            value = ((operand & 0x800) >> 11) << 26;
+            value |= ((operand & 0x700) >> 8) << 12;
+            value |= operand & 0x0ff;
+            bits |= value;
+            break;
+          case kFmtImm16:
+            value = ((operand & 0x0800) >> 11) << 26;
+            value |= ((operand & 0xf000) >> 12) << 16;
+            value |= ((operand & 0x0700) >> 8) << 12;
+            value |= operand & 0x0ff;
+            bits |= value;
+            break;
+          case kFmtOff24: {
+            uint32_t signbit = (operand >> 31) & 0x1;
+            uint32_t i1 = (operand >> 22) & 0x1;
+            uint32_t i2 = (operand >> 21) & 0x1;
+            uint32_t imm10 = (operand >> 11) & 0x03ff;
+            uint32_t imm11 = operand & 0x07ff;
+            uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+            uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+            value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+                imm11;
+            bits |= value;
+            }
+            break;
+          default:
+            LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+        }
       }
     }
     if (encoder->size == 4) {
-      code_buffer_.push_back((bits >> 16) & 0xff);
-      code_buffer_.push_back((bits >> 24) & 0xff);
+      lir->u.a.bytes[0] = ((bits >> 16) & 0xff);
+      lir->u.a.bytes[1] = ((bits >> 24) & 0xff);
+      lir->u.a.bytes[2] = (bits & 0xff);
+      lir->u.a.bytes[3] = ((bits >> 8) & 0xff);
+    } else {
+      DCHECK_EQ(encoder->size, 2);
+      lir->u.a.bytes[0] = (bits & 0xff);
+      lir->u.a.bytes[1] = ((bits >> 8) & 0xff);
     }
-    code_buffer_.push_back(bits & 0xff);
-    code_buffer_.push_back((bits >> 8) & 0xff);
+    lir->flags.size = encoder->size;
   }
-  return res;
+}
+
+// Assemble the LIR into binary instruction format.
+void ArmMir2Lir::AssembleLIR() {
+  LIR* lir;
+  LIR* prev_lir;
+  int assembler_retries = 0;
+  int starting_offset = EncodeRange(first_lir_insn_, last_lir_insn_, 0);
+  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  int offset_adjustment;
+  AssignDataOffsets();
+
+  /*
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for non-visited nodes).
+   * Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   */
+  int generation = 0;
+  while (true) {
+    offset_adjustment = 0;
+    AssemblerStatus res = kSuccess;  // Assume success
+    generation ^= 1;
+    // Note: nodes requring possible fixup linked in ascending order.
+    lir = first_fixup_;
+    prev_lir = NULL;
+    while (lir != NULL) {
+      /*
+       * NOTE: the lir being considered here will be encoded following the switch (so long as
+       * we're not in a retry situation).  However, any new non-pc_rel instructions inserted
+       * due to retry must be explicitly encoded at the time of insertion.  Note that
+       * inserted instructions don't need use/def flags, but do need size and pc-rel status
+       * properly updated.
+       */
+      lir->offset += offset_adjustment;
+      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
+      lir->flags.generation = generation;
+      switch (static_cast<FixupKind>(lir->flags.fixup)) {
+        case kFixupLabel:
+        case kFixupNone:
+          break;
+        case kFixupVLoad:
+          if (lir->operands[1] != r15pc) {
+            break;
+          }
+          // NOTE: intentional fallthrough.
+        case kFixupLoad: {
+          /*
+           * PC-relative loads are mostly used to load immediates
+           * that are too large to materialize directly in one shot.
+           * However, if the load displacement exceeds the limit,
+           * we revert to a multiple-instruction materialization sequence.
+           */
+          LIR *lir_target = lir->target;
+          uintptr_t pc = (lir->offset + 4) & ~3;
+          uintptr_t target = lir_target->offset +
+              ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          if (res != kSuccess) {
+            /*
+             * In this case, we're just estimating and will do it again for real.  Ensure offset
+             * is legal.
+             */
+            delta &= ~0x3;
+          }
+          DCHECK_EQ((delta & 0x3), 0);
+          // First, a sanity check for cases we shouldn't see now
+          if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
+              ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
+            // Shouldn't happen in current codegen.
+            LOG(FATAL) << "Unexpected pc-rel offset " << delta;
+          }
+          // Now, check for the difficult cases
+          if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+              ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
+              ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
+              ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+            /*
+             * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we
+             * sometimes have to use it to fix up out-of-range accesses.  This is where that
+             * happens.
+             */
+            int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
+                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] : rARM_LR;
+
+            // Add new Adr to generate the address.
+            LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
+                       base_reg, 0, 0, 0, 0, lir->target);
+            new_adr->offset = lir->offset;
+            new_adr->flags.fixup = kFixupAdr;
+            new_adr->flags.size = EncodingMap[kThumb2Adr].size;
+            InsertLIRBefore(lir, new_adr);
+            lir->offset += new_adr->flags.size;
+            offset_adjustment += new_adr->flags.size;
+
+            // lir no longer pcrel, unlink and link in new_adr.
+            ReplaceFixup(prev_lir, lir, new_adr);
+
+            // Convert to normal load.
+            offset_adjustment -= lir->flags.size;
+            if (lir->opcode == kThumb2LdrPcRel12) {
+              lir->opcode = kThumb2LdrRRI12;
+            } else if (lir->opcode == kThumb2LdrdPcRel8) {
+              lir->opcode = kThumb2LdrdI8;
+            }
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            offset_adjustment += lir->flags.size;
+            // Change the load to be relative to the new Adr base.
+            if (lir->opcode == kThumb2LdrdI8) {
+              lir->operands[3] = 0;
+              lir->operands[2] = base_reg;
+            } else {
+              lir->operands[2] = 0;
+              lir->operands[1] = base_reg;
+            }
+            // Must redo encoding here - won't ever revisit this node.
+            EncodeLIR(lir);
+            prev_lir = new_adr;  // Continue scan with new_adr;
+            lir = new_adr->u.a.pcrel_next;
+            res = kRetryAll;
+            continue;
+          } else {
+            if ((lir->opcode == kThumb2Vldrs) ||
+                (lir->opcode == kThumb2Vldrd) ||
+                (lir->opcode == kThumb2LdrdPcRel8)) {
+              lir->operands[2] = delta >> 2;
+            } else {
+              lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
+                  delta >> 2;
+            }
+          }
+          break;
+        }
+        case kFixupCBxZ: {
+          LIR *target_lir = lir->target;
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          if (delta > 126 || delta < 0) {
+            /*
+             * Convert to cmp rx,#0 / b[eq/ne] tgt pair
+             * Make new branch instruction and insert after
+             */
+            LIR* new_inst =
+              RawLIR(lir->dalvik_offset, kThumbBCond, 0,
+                     (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
+                     0, 0, 0, lir->target);
+            InsertLIRAfter(lir, new_inst);
+
+            /* Convert the cb[n]z to a cmp rx, #0 ] */
+            // Subtract the old size.
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumbCmpRI8;
+            /* operand[0] is src1 in both cb[n]z & CmpRI8 */
+            lir->operands[1] = 0;
+            lir->target = 0;
+            EncodeLIR(lir);   // NOTE: sets flags.size.
+            // Add back the new size.
+            DCHECK_EQ(lir->flags.size, static_cast<uint32_t>(EncodingMap[lir->opcode].size));
+            offset_adjustment += lir->flags.size;
+            // Set up the new following inst.
+            new_inst->offset = lir->offset + lir->flags.size;
+            new_inst->flags.fixup = kFixupCondBranch;
+            new_inst->flags.size = EncodingMap[new_inst->opcode].size;
+            offset_adjustment += new_inst->flags.size;
+
+            // lir no longer pcrel, unlink and link in new_inst.
+            ReplaceFixup(prev_lir, lir, new_inst);
+            prev_lir = new_inst;  // Continue with the new instruction.
+            lir = new_inst->u.a.pcrel_next;
+            res = kRetryAll;
+            continue;
+          } else {
+            lir->operands[1] = delta >> 1;
+          }
+          break;
+        }
+        case kFixupPushPop: {
+          if (__builtin_popcount(lir->operands[0]) == 1) {
+            /*
+             * The standard push/pop multiple instruction
+             * requires at least two registers in the list.
+             * If we've got just one, switch to the single-reg
+             * encoding.
+             */
+            lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
+                kThumb2Pop1;
+            int reg = 0;
+            while (lir->operands[0]) {
+              if (lir->operands[0] & 0x1) {
+                break;
+              } else {
+                reg++;
+                lir->operands[0] >>= 1;
+              }
+            }
+            lir->operands[0] = reg;
+            // This won't change again, don't bother unlinking, just reset fixup kind
+            lir->flags.fixup = kFixupNone;
+          }
+          break;
+        }
+        case kFixupCondBranch: {
+          LIR *target_lir = lir->target;
+          int delta = 0;
+          DCHECK(target_lir);
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          delta = target - pc;
+          if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumb2BCond;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            // Fixup kind remains the same.
+            offset_adjustment += lir->flags.size;
+            res = kRetryAll;
+          }
+          lir->operands[0] = delta >> 1;
+          break;
+        }
+        case kFixupT2Branch: {
+          LIR *target_lir = lir->target;
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          lir->operands[0] = delta >> 1;
+          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
+            // Useless branch
+            offset_adjustment -= lir->flags.size;
+            lir->flags.is_nop = true;
+            // Don't unlink - just set to do-nothing.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
+          break;
+        }
+        case kFixupT1Branch: {
+          LIR *target_lir = lir->target;
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          if (delta > 2046 || delta < -2048) {
+            // Convert to Thumb2BCond w/ kArmCondAl
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumb2BUncond;
+            lir->operands[0] = 0;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            lir->flags.fixup = kFixupT2Branch;
+            offset_adjustment += lir->flags.size;
+            res = kRetryAll;
+          } else {
+            lir->operands[0] = delta >> 1;
+            if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) {
+              // Useless branch
+              offset_adjustment -= lir->flags.size;
+              lir->flags.is_nop = true;
+              // Don't unlink - just set to do-nothing.
+              lir->flags.fixup = kFixupNone;
+              res = kRetryAll;
+            }
+          }
+          break;
+        }
+        case kFixupBlx1: {
+          DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
+          /* cur_pc is Thumb */
+          uintptr_t cur_pc = (lir->offset + 4) & ~3;
+          uintptr_t target = lir->operands[1];
+
+          /* Match bit[1] in target with base */
+          if (cur_pc & 0x2) {
+            target |= 0x2;
+          }
+          int delta = target - cur_pc;
+          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+          lir->operands[0] = (delta >> 12) & 0x7ff;
+          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          break;
+        }
+        case kFixupBl1: {
+          DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
+          /* Both cur_pc and target are Thumb */
+          uintptr_t cur_pc = lir->offset + 4;
+          uintptr_t target = lir->operands[1];
+
+          int delta = target - cur_pc;
+          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+          lir->operands[0] = (delta >> 12) & 0x7ff;
+          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          break;
+        }
+        case kFixupAdr: {
+          SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]);
+          LIR* target = lir->target;
+          int target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
+              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int disp = target_disp - ((lir->offset + 4) & ~3);
+          if (disp < 4096) {
+            lir->operands[1] = disp;
+          } else {
+            // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
+            // TUNING: if this case fires often, it can be improved.  Not expected to be common.
+            LIR *new_mov16L =
+                RawLIR(lir->dalvik_offset, kThumb2MovImm16LST,
+                       lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
+                       reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
+            new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size;
+            new_mov16L->flags.fixup = kFixupMovImmLST;
+            new_mov16L->offset = lir->offset;
+            // Link the new instruction, retaining lir.
+            InsertLIRBefore(lir, new_mov16L);
+            lir->offset += new_mov16L->flags.size;
+            offset_adjustment += new_mov16L->flags.size;
+            InsertFixupBefore(prev_lir, lir, new_mov16L);
+            prev_lir = new_mov16L;   // Now we've got a new prev.
+
+            LIR *new_mov16H =
+                RawLIR(lir->dalvik_offset, kThumb2MovImm16HST,
+                       lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
+                       reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
+            new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size;
+            new_mov16H->flags.fixup = kFixupMovImmHST;
+            new_mov16H->offset = lir->offset;
+            // Link the new instruction, retaining lir.
+            InsertLIRBefore(lir, new_mov16H);
+            lir->offset += new_mov16H->flags.size;
+            offset_adjustment += new_mov16H->flags.size;
+            InsertFixupBefore(prev_lir, lir, new_mov16H);
+            prev_lir = new_mov16H;  // Now we've got a new prev.
+
+            offset_adjustment -= lir->flags.size;
+            if (ARM_LOWREG(lir->operands[0])) {
+              lir->opcode = kThumbAddRRLH;
+            } else {
+              lir->opcode = kThumbAddRRHH;
+            }
+            lir->operands[1] = rARM_PC;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            offset_adjustment += lir->flags.size;
+            // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
+          break;
+        }
+        case kFixupMovImmLST: {
+          // operands[1] should hold disp, [2] has add, [3] has tab_rec
+          LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
+          SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
+          // If tab_rec is null, this is a literal load. Use target
+          LIR* target = lir->target;
+          int target_disp = tab_rec ? tab_rec->offset : target->offset;
+          lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
+          break;
+        }
+        case kFixupMovImmHST: {
+          // operands[1] should hold disp, [2] has add, [3] has tab_rec
+          LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
+          SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
+          // If tab_rec is null, this is a literal load. Use target
+          LIR* target = lir->target;
+          int target_disp = tab_rec ? tab_rec->offset : target->offset;
+          lir->operands[1] =
+              ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
+          break;
+        }
+        case kFixupAlign4: {
+          int required_size = lir->offset & 0x2;
+          if (lir->flags.size != required_size) {
+            offset_adjustment += required_size - lir->flags.size;
+            lir->flags.size = required_size;
+            res = kRetryAll;
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
+      }
+      /*
+       * If one of the pc-relative instructions expanded we'll have
+       * to make another pass.  Don't bother to fully assemble the
+       * instruction.
+       */
+      if (res == kSuccess) {
+        EncodeLIR(lir);
+        if (assembler_retries == 0) {
+          // Go ahead and fix up the code buffer image.
+          for (int i = 0; i < lir->flags.size; i++) {
+            code_buffer_[lir->offset + i] = lir->u.a.bytes[i];
+          }
+        }
+      }
+      prev_lir = lir;
+      lir = lir->u.a.pcrel_next;
+    }
+
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      starting_offset += offset_adjustment;
+      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      AssignDataOffsets();
+    }
+  }
+
+  // Rebuild the CodeBuffer if we had to retry; otherwise it should be good as-is.
+  if (assembler_retries != 0) {
+    code_buffer_.clear();
+    for (LIR* lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
+      if (lir->flags.is_nop) {
+        continue;
+      } else  {
+        for (int i = 0; i < lir->flags.size; i++) {
+          code_buffer_.push_back(lir->u.a.bytes[i]);
+        }
+      }
+    }
+  }
+
+  data_offset_ = (code_buffer_.size() + 0x3) & ~0x3;
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  CreateMappingTables();
+
+  CreateNativeGcMap();
 }
 
 int ArmMir2Lir::GetInsnSize(LIR* lir) {
+  DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
 
+// Encode instruction bit pattern and assign offsets.
+uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
+  LIR* end_lir = tail_lir->next;
+
+  /*
+   * A significant percentage of methods can be assembled in a single pass.  We'll
+   * go ahead and build the code image here, leaving holes for pc-relative fixup
+   * codes.  If the code size changes during that pass, we'll have to throw away
+   * this work - but if not, we're ready to go.
+   */
+  code_buffer_.reserve(estimated_native_code_size_ + 256);  // Add a little slop.
+  LIR* last_fixup = NULL;
+  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+    lir->offset = offset;
+    if (!lir->flags.is_nop) {
+      if (lir->flags.fixup != kFixupNone) {
+        if (!IsPseudoLirOp(lir->opcode)) {
+          lir->flags.size = EncodingMap[lir->opcode].size;
+          lir->flags.fixup = EncodingMap[lir->opcode].fixup;
+        } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+          lir->flags.size = (offset & 0x2);
+          lir->flags.fixup = kFixupAlign4;
+        } else {
+          lir->flags.size = 0;
+          lir->flags.fixup = kFixupLabel;
+        }
+        // Link into the fixup chain.
+        lir->flags.use_def_invalid = true;
+        lir->u.a.pcrel_next = NULL;
+        if (first_fixup_ == NULL) {
+          first_fixup_ = lir;
+        } else {
+          last_fixup->u.a.pcrel_next = lir;
+        }
+        last_fixup = lir;
+      } else {
+        EncodeLIR(lir);
+      }
+      for (int i = 0; i < lir->flags.size; i++) {
+        code_buffer_.push_back(lir->u.a.bytes[i]);
+      }
+      offset += lir->flags.size;
+    }
+  }
+  return offset;
+}
+
+void ArmMir2Lir::AssignDataOffsets() {
+  /* Set up offsets for literals */
+  int offset = data_offset_;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  total_size_ = AssignFillArrayDataOffset(offset);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 2dbe5f5..401da2a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -120,9 +120,10 @@
 // TODO:  move to common code
 void ArmMir2Lir::GenPrintLabel(MIR* mir) {
   /* Mark the beginning of a Dalvik instruction for line tracking */
-  char* inst_str = cu_->verbose ?
-     mir_graph_->GetDalvikDisassembly(mir) : NULL;
-  MarkBoundary(mir->offset, inst_str);
+  if (cu_->verbose) {
+    char* inst_str = mir_graph_->GetDalvikDisassembly(mir);
+    MarkBoundary(mir->offset, inst_str);
+  }
 }
 
 MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir,
@@ -130,7 +131,7 @@
   int field_offset;
   bool is_volatile;
   uint32_t field_idx = mir->dalvikInsn.vC;
-  bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false);
+  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
   if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) {
     return NULL;
   }
@@ -155,7 +156,7 @@
   int field_offset;
   bool is_volatile;
   uint32_t field_idx = mir->dalvikInsn.vC;
-  bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false);
+  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
   if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) {
     return NULL;
   }
@@ -439,88 +440,120 @@
 }
 
 /*
- * Handle simple case (thin lock) inline.  If it's complicated, bail
- * out to the heavyweight lock/unlock routines.  We'll use dedicated
- * registers here in order to be in the right position in case we
- * to bail to oat[Lock/Unlock]Object(self, object)
- *
- * r0 -> self pointer [arg0 for oat[Lock/Unlock]Object
- * r1 -> object [arg1 for oat[Lock/Unlock]Object
- * r2 -> intial contents of object->lock, later result of strex
- * r3 -> self->thread_id
- * r12 -> allow to be used by utilities as general temp
- *
- * The result of the strex is 0 if we acquire the lock.
- *
- * See comments in monitor.cc for the layout of the lock word.
- * Of particular interest to this code is the test for the
- * simple case - which we handle inline.  For monitor enter, the
- * simple case is thin lock, held by no-one.  For monitor exit,
- * the simple case is thin lock, held by the unlocking thread with
- * a recurse count of 0.
- *
- * A minor complication is that there is a field in the lock word
- * unrelated to locking: the hash state.  This field must be ignored, but
- * preserved.
- *
+ * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc.
  */
 void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  DCHECK_EQ(LW_SHAPE_THIN, 0);
   LoadValueDirectFixed(rl_src, r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
-  LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
-  NewLIR3(kThumb2Ldrex, r1, r0,
-          mirror::Object::MonitorOffset().Int32Value() >> 2);  // Get object->lock
-  // Align owner
-  OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT);
-  // Is lock unheld on lock or held by us (==thread_id) on unlock?
-  NewLIR4(kThumb2Bfi, r2, r1, 0, LW_LOCK_OWNER_SHIFT - 1);
-  NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  OpRegImm(kOpCmp, r1, 0);
-  OpIT(kCondEq, "");
-  NewLIR4(kThumb2Strex, r1, r2, r0,
-          mirror::Object::MonitorOffset().Int32Value() >> 2);
-  OpRegImm(kOpCmp, r1, 0);
-  OpIT(kCondNe, "T");
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
-  MarkSafepointPC(call_inst);
-  GenMemBarrier(kLoadLoad);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    LIR* null_check_branch;
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
+    }
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL);
+    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL);
+
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    not_unlocked_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artLockObjectFromCode(obj);
+    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    lock_success_branch->target = success_target;
+    GenMemBarrier(kLoadLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, r1, 0);
+    OpIT(kCondEq, "");
+    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, r1, 0);
+    OpIT(kCondNe, "T");
+    // Go expensive route - artLockObjectFromCode(self, obj);
+    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kLoadLoad);
+  }
 }
 
 /*
- * For monitor unlock, we don't have to use ldrex/strex.  Once
- * we've determined that the lock is thin and that we own it with
- * a zero recursion count, it's safe to punch it back to the
- * initial, unlock thin state with a store word.
+ * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * and can only give away ownership if its suspended.
  */
 void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  DCHECK_EQ(LW_SHAPE_THIN, 0);
   FlushAllRegs();
   LoadValueDirectFixed(rl_src, r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
-  LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
+  LIR* null_check_branch;
   LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
-  // Is lock unheld on lock or held by us (==thread_id) on unlock?
-  OpRegRegImm(kOpAnd, r3, r1,
-              (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-  // Align owner
-  OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT);
-  NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  OpRegReg(kOpSub, r1, r2);
-  OpIT(kCondEq, "EE");
-  StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
-  MarkSafepointPC(call_inst);
-  GenMemBarrier(kStoreLoad);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
+    }
+    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);
+    LoadConstantNoClobber(r3, 0);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL);
+    StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
+    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    slow_unlock_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artUnlockObjectFromCode(obj);
+    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    unlock_success_branch->target = success_target;
+    GenMemBarrier(kStoreLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
+    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    LoadConstantNoClobber(r3, 0);
+    // Is lock unheld on lock or held by us (==thread_id) on unlock?
+    OpRegReg(kOpCmp, r1, r2);
+    OpIT(kCondEq, "EE");
+    StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
+    // Go expensive route - UnlockObjectFromCode(obj);
+    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kStoreLoad);
+  }
 }
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 291319f..aa5782b 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -51,7 +51,6 @@
     int AllocTypedTempPair(bool fp_hint, int reg_class);
     int S2d(int low_reg, int high_reg);
     int TargetReg(SpecialTargetRegister reg);
-    RegisterInfo* GetRegInfo(int reg);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -71,9 +70,14 @@
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
+    void AssembleLIR();
+    uint32_t EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t starting_offset);
+    int AssignInsnOffsets();
+    void AssignOffsets();
     AssemblerStatus AssembleInstructions(uintptr_t start_addr);
+    void EncodeLIR(LIR* lir);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
@@ -85,12 +89,10 @@
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
-                        RegLocation rl_src, int scale);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                      RegLocation rl_index, RegLocation rl_dest, int scale);
-    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale);
+    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                     RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
     void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -188,6 +190,9 @@
     MIR* SpecialIdentity(MIR* mir);
     LIR* LoadFPConstantValue(int r_dest, int value);
     bool BadOverlap(RegLocation rl_src, RegLocation rl_dest);
+    void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void AssignDataOffsets();
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 6fbdd2f..75d3738 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -24,8 +24,7 @@
 
 namespace art {
 
-LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1,
-         int src2, LIR* target) {
+LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) {
   OpRegReg(kOpCmp, src1, src2);
   return OpCondBranch(cond, target);
 }
@@ -234,11 +233,17 @@
     rl_false = LoadValue(rl_false, kCoreReg);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     OpRegImm(kOpCmp, rl_src.low_reg, 0);
-    OpIT(kCondEq, "E");
-    LIR* l1 = OpRegCopy(rl_result.low_reg, rl_true.low_reg);
-    l1->flags.is_nop = false;  // Make sure this instruction isn't optimized away
-    LIR* l2 = OpRegCopy(rl_result.low_reg, rl_false.low_reg);
-    l2->flags.is_nop = false;  // Make sure this instruction isn't optimized away
+    if (rl_result.low_reg == rl_true.low_reg) {  // Is the "true" case already in place?
+      OpIT(kCondNe, "");
+      OpRegCopy(rl_result.low_reg, rl_false.low_reg);
+    } else if (rl_result.low_reg == rl_false.low_reg) {  // False case in place?
+      OpIT(kCondEq, "");
+      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
+    } else {  // Normal - select between the two.
+      OpIT(kCondEq, "E");
+      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
+      OpRegCopy(rl_result.low_reg, rl_false.low_reg);
+    }
     GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
   }
   StoreValue(rl_dest, rl_result);
@@ -313,7 +318,18 @@
   LIR* branch;
   int mod_imm;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
-  if ((ARM_LOWREG(reg)) && (check_value == 0) &&
+  /*
+   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
+   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
+   * branch forward to a launch pad, they will frequently not reach - and thus have to
+   * be converted to a long form during assembly (which will trigger another assembly
+   * pass).  Here we estimate the branch distance for checks, and if large directly
+   * generate the long form in an attempt to avoid an extra assembly pass.
+   * TODO: consider interspersing launchpads in code following unconditional branches.
+   */
+  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
+  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
+  if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) &&
      ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
     branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
                      reg, 0);
@@ -618,7 +634,7 @@
       break;
   }
   LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor);
-  dmb->def_mask = ENCODE_ALL;
+  dmb->u.m.def_mask = ENCODE_ALL;
 #endif
 }
 
@@ -755,7 +771,7 @@
  * Generate array load
  */
 void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale) {
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -845,13 +861,13 @@
  *
  */
 void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale) {
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
   bool constant_index = rl_index.is_const;
 
-  if (rl_src.wide) {
+  int data_offset;
+  if (size == kLong || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -868,12 +884,14 @@
   }
 
   int reg_ptr;
+  bool allocated_reg_ptr_temp = false;
   if (constant_index) {
     reg_ptr = rl_array.low_reg;
   } else if (IsTemp(rl_array.low_reg)) {
     Clobber(rl_array.low_reg);
     reg_ptr = rl_array.low_reg;
   } else {
+    allocated_reg_ptr_temp = true;
     reg_ptr = AllocTemp();
   }
 
@@ -924,71 +942,15 @@
     StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg,
                      scale, size);
   }
-  if (!constant_index) {
+  if (allocated_reg_ptr_temp) {
     FreeTemp(reg_ptr);
   }
-}
-
-/*
- * Generate array store
- *
- */
-void ArmMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value();
-
-  FlushAllRegs();  // Use explicit registers
-  LockCallTemps();
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(rl_index, r_index);  // Grab index
-
-  GenNullCheck(rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  int reg_len = INVALID_REG;
-  if (needs_range_check) {
-    reg_len = TargetReg(kArg1);
-    LoadWordDisp(r_array, len_offset, reg_len);  // Get len
-  }
-  /* r_ptr -> array data */
-  int r_ptr = AllocTemp();
-  OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset);
-  if (needs_range_check) {
-    GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds);
-  }
-  StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord);
-  FreeTemp(r_ptr);
-  FreeTemp(r_index);
-  if (!mir_graph_->IsConstantNullRef(rl_src)) {
-    MarkGCCard(r_value, r_array);
+  if (card_mark) {
+    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
   }
 }
 
+
 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 6cc3052..933c1a3 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -118,78 +118,83 @@
   return ENCODE_ARM_REG_PC;
 }
 
-void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir) {
+// Thumb2 specific setup.  TODO: inline?:
+void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
+  DCHECK(!lir->flags.use_def_invalid);
 
-  // Thumb2 specific setup
-  uint64_t flags = ArmMir2Lir::EncodingMap[lir->opcode].flags;
   int opcode = lir->opcode;
 
-  if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_ARM_REG_SP;
-  }
-
-  if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_ARM_REG_SP;
-  }
-
-  if (flags & REG_DEF_LIST0) {
-    lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_DEF_LIST1) {
-    lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-  }
-
-  if (flags & REG_DEF_FPCS_LIST0) {
-    lir->def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_DEF_FPCS_LIST2) {
-    for (int i = 0; i < lir->operands[2]; i++) {
-      SetupRegMask(&lir->def_mask, lir->operands[1] + i);
+  // These flags are somewhat uncommon - bypass if we can.
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
+                REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
+                REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
+    if (flags & REG_DEF_SP) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
     }
-  }
 
-  if (flags & REG_USE_PC) {
-    lir->use_mask |= ENCODE_ARM_REG_PC;
-  }
-
-  /* Conservatively treat the IT block */
-  if (flags & IS_IT) {
-    lir->def_mask = ENCODE_ALL;
-  }
-
-  if (flags & REG_USE_LIST0) {
-    lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_USE_LIST1) {
-    lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-  }
-
-  if (flags & REG_USE_FPCS_LIST0) {
-    lir->use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_USE_FPCS_LIST2) {
-    for (int i = 0; i < lir->operands[2]; i++) {
-      SetupRegMask(&lir->use_mask, lir->operands[1] + i);
+    if (flags & REG_USE_SP) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
     }
-  }
-  /* Fixup for kThumbPush/lr and kThumbPop/pc */
-  if (opcode == kThumbPush || opcode == kThumbPop) {
-    uint64_t r8Mask = GetRegMaskCommon(r8);
-    if ((opcode == kThumbPush) && (lir->use_mask & r8Mask)) {
-      lir->use_mask &= ~r8Mask;
-      lir->use_mask |= ENCODE_ARM_REG_LR;
-    } else if ((opcode == kThumbPop) && (lir->def_mask & r8Mask)) {
-      lir->def_mask &= ~r8Mask;
-      lir->def_mask |= ENCODE_ARM_REG_PC;
+
+    if (flags & REG_DEF_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
     }
-  }
-  if (flags & REG_DEF_LR) {
-    lir->def_mask |= ENCODE_ARM_REG_LR;
+
+    if (flags & REG_DEF_LIST1) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
+      }
+    }
+
+    if (flags & REG_USE_PC) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
+    }
+
+    /* Conservatively treat the IT block */
+    if (flags & IS_IT) {
+      lir->u.m.def_mask = ENCODE_ALL;
+    }
+
+    if (flags & REG_USE_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_LIST1) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
+      }
+    }
+    /* Fixup for kThumbPush/lr and kThumbPop/pc */
+    if (opcode == kThumbPush || opcode == kThumbPop) {
+      uint64_t r8Mask = GetRegMaskCommon(r8);
+      if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
+        lir->u.m.use_mask &= ~r8Mask;
+        lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
+      } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
+        lir->u.m.def_mask &= ~r8Mask;
+        lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
+      }
+    }
+    if (flags & REG_DEF_LR) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+    }
   }
 }
 
@@ -466,8 +471,8 @@
 
     /* Memory bits */
     if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", arm_lir->alias_info & 0xffff,
-              (arm_lir->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+              DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
@@ -691,11 +696,6 @@
   return res;
 }
 
-ArmMir2Lir::RegisterInfo* ArmMir2Lir::GetRegInfo(int reg) {
-  return ARM_FPREG(reg) ? &reg_pool_->FPRegs[reg & ARM_FP_REG_MASK]
-      : &reg_pool_->core_regs[reg];
-}
-
 /* To be used when explicitly managing register use */
 void ArmMir2Lir::LockCallTemps() {
   LockTemp(r0);
@@ -718,14 +718,17 @@
 }
 
 uint64_t ArmMir2Lir::GetTargetInstFlags(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return ArmMir2Lir::EncodingMap[opcode].flags;
 }
 
 const char* ArmMir2Lir::GetTargetInstName(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return ArmMir2Lir::EncodingMap[opcode].name;
 }
 
 const char* ArmMir2Lir::GetTargetInstFmt(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return ArmMir2Lir::EncodingMap[opcode].fmt;
 }
 
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index c63de69..00de8de 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -90,7 +90,6 @@
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
                           r_dest, r15pc, 0, 0, 0, data_target);
   SetMemRefType(load_pc_rel, true, kLiteral);
-  load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
@@ -328,7 +327,7 @@
       LOG(FATAL) << "Bad opcode: " << op;
       break;
   }
-  DCHECK_GE(static_cast<int>(opcode), 0);
+  DCHECK(!IsPseudoLirOp(opcode));
   if (EncodingMap[opcode].flags & IS_BINARY_OP) {
     return NewLIR2(opcode, r_dest_src1, r_src2);
   } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
@@ -406,7 +405,7 @@
       LOG(FATAL) << "Bad opcode: " << op;
       break;
   }
-  DCHECK_GE(static_cast<int>(opcode), 0);
+  DCHECK(!IsPseudoLirOp(opcode));
   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
     return NewLIR4(opcode, r_dest, r_src1, r_src2, shift);
   } else {
@@ -626,7 +625,6 @@
                    r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target);
     }
     SetMemRefType(res, true, kLiteral);
-    res->alias_info = reinterpret_cast<uintptr_t>(data_target);
     AppendLIR(res);
   }
   return res;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index a49fa7b..617f357 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -45,29 +45,54 @@
 }
 
 void Mir2Lir::MarkSafepointPC(LIR* inst) {
-  inst->def_mask = ENCODE_ALL;
+  DCHECK(!inst->flags.use_def_invalid);
+  inst->u.m.def_mask = ENCODE_ALL;
   LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
-  DCHECK_EQ(safepoint_pc->def_mask, ENCODE_ALL);
+  DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL);
 }
 
-bool Mir2Lir::FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put) {
+bool Mir2Lir::FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile) {
   return cu_->compiler_driver->ComputeInstanceFieldInfo(
-      field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, is_volatile, is_put);
+      field_idx, mir_graph_->GetCurrentDexCompilationUnit(), is_put, field_offset, is_volatile);
+}
+
+/* Remove a LIR from the list. */
+void Mir2Lir::UnlinkLIR(LIR* lir) {
+  if (UNLIKELY(lir == first_lir_insn_)) {
+    first_lir_insn_ = lir->next;
+    if (lir->next != NULL) {
+      lir->next->prev = NULL;
+    } else {
+      DCHECK(lir->next == NULL);
+      DCHECK(lir == last_lir_insn_);
+      last_lir_insn_ = NULL;
+    }
+  } else if (lir == last_lir_insn_) {
+    last_lir_insn_ = lir->prev;
+    lir->prev->next = NULL;
+  } else if ((lir->prev != NULL) && (lir->next != NULL)) {
+    lir->prev->next = lir->next;
+    lir->next->prev = lir->prev;
+  }
 }
 
 /* Convert an instruction to a NOP */
 void Mir2Lir::NopLIR(LIR* lir) {
   lir->flags.is_nop = true;
+  if (!cu_->verbose) {
+    UnlinkLIR(lir);
+  }
 }
 
 void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) {
   uint64_t *mask_ptr;
   uint64_t mask = ENCODE_MEM;
   DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE));
+  DCHECK(!lir->flags.use_def_invalid);
   if (is_load) {
-    mask_ptr = &lir->use_mask;
+    mask_ptr = &lir->u.m.use_mask;
   } else {
-    mask_ptr = &lir->def_mask;
+    mask_ptr = &lir->u.m.def_mask;
   }
   /* Clear out the memref flags */
   *mask_ptr &= ~mask;
@@ -104,7 +129,7 @@
    * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit
    * access.
    */
-  lir->alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
+  lir->flags.alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
 }
 
 /*
@@ -190,11 +215,11 @@
       break;
   }
 
-  if (lir->use_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->use_mask, "use"));
+  if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) {
+    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use"));
   }
-  if (lir->def_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->def_mask, "def"));
+  if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) {
+    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def"));
   }
 }
 
@@ -225,12 +250,12 @@
 }
 
 /* Dump a mapping table */
-void Mir2Lir::DumpMappingTable(const char* table_name, const std::string& descriptor,
-                               const std::string& name, const std::string& signature,
+void Mir2Lir::DumpMappingTable(const char* table_name, const char* descriptor,
+                               const char* name, const Signature& signature,
                                const std::vector<uint32_t>& v) {
   if (v.size() > 0) {
     std::string line(StringPrintf("\n  %s %s%s_%s_table[%zu] = {", table_name,
-                     descriptor.c_str(), name.c_str(), signature.c_str(), v.size()));
+                     descriptor, name, signature.ToString().c_str(), v.size()));
     std::replace(line.begin(), line.end(), ';', '_');
     LOG(INFO) << line;
     for (uint32_t i = 0; i < v.size(); i+=2) {
@@ -270,9 +295,9 @@
 
   const DexFile::MethodId& method_id =
       cu_->dex_file->GetMethodId(cu_->method_idx);
-  std::string signature(cu_->dex_file->GetMethodSignature(method_id));
-  std::string name(cu_->dex_file->GetMethodName(method_id));
-  std::string descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id));
+  const Signature signature = cu_->dex_file->GetMethodSignature(method_id);
+  const char* name = cu_->dex_file->GetMethodName(method_id);
+  const char* descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id));
 
   // Dump mapping tables
   DumpMappingTable("PC2Dex_MappingTable", descriptor, name, signature, pc2dex_mapping_table_);
@@ -325,6 +350,7 @@
     new_value->operands[0] = value;
     new_value->next = *constant_list_p;
     *constant_list_p = new_value;
+    estimated_native_code_size_ += sizeof(value);
     return new_value;
   }
   return NULL;
@@ -408,6 +434,7 @@
     int bx_offset = INVALID_OFFSET;
     switch (cu_->instruction_set) {
       case kThumb2:
+        DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
         bx_offset = tab_rec->anchor->offset + 4;
         break;
       case kX86:
@@ -691,116 +718,29 @@
   return offset;
 }
 
-// LIR offset assignment.
-int Mir2Lir::AssignInsnOffsets() {
-  LIR* lir;
-  int offset = 0;
-
-  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
-    lir->offset = offset;
-    if (lir->opcode >= 0) {
-      if (!lir->flags.is_nop) {
-        offset += lir->flags.size;
-      }
-    } else if (lir->opcode == kPseudoPseudoAlign4) {
-      if (offset & 0x2) {
-        offset += 2;
-        lir->operands[0] = 1;
-      } else {
-        lir->operands[0] = 0;
-      }
-    }
-    /* Pseudo opcodes don't consume space */
-  }
-
-  return offset;
-}
-
-/*
- * Walk the compilation unit and assign offsets to instructions
- * and literals and compute the total size of the compiled unit.
- */
-void Mir2Lir::AssignOffsets() {
-  int offset = AssignInsnOffsets();
-
-  /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
-
-  /* Set up offsets for literals */
-  data_offset_ = offset;
-
-  offset = AssignLiteralOffset(offset);
-
-  offset = AssignSwitchTablesOffset(offset);
-
-  offset = AssignFillArrayDataOffset(offset);
-
-  total_size_ = offset;
-}
-
-/*
- * Go over each instruction in the list and calculate the offset from the top
- * before sending them off to the assembler. If out-of-range branch distance is
- * seen rearrange the instructions a bit to correct it.
- */
-void Mir2Lir::AssembleLIR() {
-  AssignOffsets();
-  int assembler_retries = 0;
-  /*
-   * Assemble here.  Note that we generate code with optimistic assumptions
-   * and if found now to work, we'll have to redo the sequence and retry.
-   */
-
-  while (true) {
-    AssemblerStatus res = AssembleInstructions(0);
-    if (res == kSuccess) {
-      break;
-    } else {
-      assembler_retries++;
-      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
-        CodegenDump();
-        LOG(FATAL) << "Assembler error - too many retries";
-      }
-      // Redo offsets and try again
-      AssignOffsets();
-      code_buffer_.clear();
-    }
-  }
-
-  // Install literals
-  InstallLiteralPools();
-
-  // Install switch tables
-  InstallSwitchTables();
-
-  // Install fill array data
-  InstallFillArrayData();
-
-  // Create the mapping table and native offset to reference map.
-  CreateMappingTables();
-
-  CreateNativeGcMap();
-}
-
 /*
  * Insert a kPseudoCaseLabel at the beginning of the Dalvik
- * offset vaddr.  This label will be used to fix up the case
- * branch table during the assembly phase.  Be sure to set
- * all resource flags on this to prevent code motion across
- * target boundaries.  KeyVal is just there for debugging.
+ * offset vaddr if pretty-printing, otherise use the standard block
+ * label.  The selected label will be used to fix up the case
+ * branch table during the assembly phase.  All resource flags
+ * are set to prevent code motion.  KeyVal is just there for debugging.
  */
 LIR* Mir2Lir::InsertCaseLabel(int vaddr, int keyVal) {
-  SafeMap<unsigned int, LIR*>::iterator it;
-  it = boundary_map_.find(vaddr);
-  if (it == boundary_map_.end()) {
-    LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
+  LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id];
+  LIR* res = boundary_lir;
+  if (cu_->verbose) {
+    // Only pay the expense if we're pretty-printing.
+    LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
+    new_label->dalvik_offset = vaddr;
+    new_label->opcode = kPseudoCaseLabel;
+    new_label->operands[0] = keyVal;
+    new_label->flags.fixup = kFixupLabel;
+    DCHECK(!new_label->flags.use_def_invalid);
+    new_label->u.m.def_mask = ENCODE_ALL;
+    InsertLIRAfter(boundary_lir, new_label);
+    res = new_label;
   }
-  LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
-  new_label->dalvik_offset = vaddr;
-  new_label->opcode = kPseudoCaseLabel;
-  new_label->operands[0] = keyVal;
-  InsertLIRAfter(it->second, new_label);
-  return new_label;
+  return res;
 }
 
 void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) {
@@ -883,18 +823,9 @@
   }
 }
 
-/*
- * Set up special LIR to mark a Dalvik byte-code instruction start and
- * record it in the boundary_map.  NOTE: in cases such as kMirOpCheck in
- * which we split a single Dalvik instruction, only the first MIR op
- * associated with a Dalvik PC should be entered into the map.
- */
-LIR* Mir2Lir::MarkBoundary(int offset, const char* inst_str) {
-  LIR* res = NewLIR1(kPseudoDalvikByteCodeBoundary, reinterpret_cast<uintptr_t>(inst_str));
-  if (boundary_map_.find(offset) == boundary_map_.end()) {
-    boundary_map_.Put(offset, res);
-  }
-  return res;
+/* Set up special LIR to mark a Dalvik byte-code instruction start for pretty printing */
+void Mir2Lir::MarkBoundary(int offset, const char* inst_str) {
+  NewLIR1(kPseudoDalvikByteCodeBoundary, reinterpret_cast<uintptr_t>(inst_str));
 }
 
 bool Mir2Lir::EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
@@ -942,6 +873,7 @@
       literal_list_(NULL),
       method_literal_list_(NULL),
       code_literal_list_(NULL),
+      first_fixup_(NULL),
       cu_(cu),
       mir_graph_(mir_graph),
       switch_tables_(arena, 4, kGrowableArraySwitchTables),
@@ -949,10 +881,13 @@
       throw_launchpads_(arena, 2048, kGrowableArrayThrowLaunchPads),
       suspend_launchpads_(arena, 4, kGrowableArraySuspendLaunchPads),
       intrinsic_launchpads_(arena, 2048, kGrowableArrayMisc),
+      tempreg_info_(arena, 20, kGrowableArrayMisc),
+      reginfo_map_(arena, 64, kGrowableArrayMisc),
       data_offset_(0),
       total_size_(0),
       block_label_list_(NULL),
       current_dalvik_offset_(0),
+      estimated_native_code_size_(0),
       reg_pool_(NULL),
       live_sreg_(0),
       num_core_spills_(0),
@@ -1091,5 +1026,4 @@
   new_lir->next->prev = new_lir;
 }
 
-
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index f018c61..2670c23 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -30,13 +30,14 @@
  */
 
 /*
- * Generate an kPseudoBarrier marker to indicate the boundary of special
+ * Generate a kPseudoBarrier marker to indicate the boundary of special
  * blocks.
  */
 void Mir2Lir::GenBarrier() {
   LIR* barrier = NewLIR0(kPseudoBarrier);
   /* Mark all resources as being clobbered */
-  barrier->def_mask = -1;
+  DCHECK(!barrier->flags.use_def_invalid);
+  barrier->u.m.def_mask = ENCODE_ALL;
 }
 
 // FIXME: need to do some work to split out targets with
@@ -65,8 +66,7 @@
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(int s_reg, int m_reg, int opt_flags) {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-    opt_flags & MIR_IGNORE_NULL_CHECK) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
     return NULL;
   }
   return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
@@ -127,13 +127,11 @@
         InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src2))) {
       // OK - convert this to a compare immediate and branch
       OpCmpImmBranch(cond, rl_src1.low_reg, mir_graph_->ConstantValue(rl_src2), taken);
-      OpUnconditionalBranch(fall_through);
       return;
     }
   }
   rl_src2 = LoadValue(rl_src2, kCoreReg);
   OpCmpBranch(cond, rl_src1.low_reg, rl_src2.low_reg, taken);
-  OpUnconditionalBranch(fall_through);
 }
 
 void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken,
@@ -164,7 +162,6 @@
       LOG(FATAL) << "Unexpected opcode " << opcode;
   }
   OpCmpImmBranch(cond, rl_src.low_reg, 0, taken);
-  OpUnconditionalBranch(fall_through);
 }
 
 void Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
@@ -337,8 +334,8 @@
   bool is_volatile;
   bool is_referrers_class;
   bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo(
-      field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index,
-      is_referrers_class, is_volatile, true);
+      field_idx, mir_graph_->GetCurrentDexCompilationUnit(), true,
+      &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
   if (fast_path && !SLOW_FIELD_PATH) {
     DCHECK_GE(field_offset, 0);
     int rBase;
@@ -423,8 +420,8 @@
   bool is_volatile;
   bool is_referrers_class;
   bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo(
-      field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index,
-      is_referrers_class, is_volatile, false);
+      field_idx, mir_graph_->GetCurrentDexCompilationUnit(), false,
+      &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
   if (fast_path && !SLOW_FIELD_PATH) {
     DCHECK_GE(field_offset, 0);
     int rBase;
@@ -626,7 +623,7 @@
   int field_offset;
   bool is_volatile;
 
-  bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false);
+  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
 
   if (fast_path && !SLOW_FIELD_PATH) {
     RegLocation rl_result;
@@ -687,8 +684,7 @@
   int field_offset;
   bool is_volatile;
 
-  bool fast_path = FastInstance(field_idx, field_offset, is_volatile,
-                 true);
+  bool fast_path = FastInstance(field_idx, true, &field_offset, &is_volatile);
   if (fast_path && !SLOW_FIELD_PATH) {
     RegisterClass reg_class = oat_reg_class_by_size(size);
     DCHECK_GE(field_offset, 0);
@@ -730,6 +726,18 @@
   }
 }
 
+void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
+                             RegLocation rl_src) {
+  bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK);
+  bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) &&
+      (opt_flags & MIR_IGNORE_NULL_CHECK));
+  ThreadOffset helper = needs_range_check
+      ? (needs_null_check ? QUICK_ENTRYPOINT_OFFSET(pAputObjectWithNullAndBoundCheck)
+                          : QUICK_ENTRYPOINT_OFFSET(pAputObjectWithBoundCheck))
+      : QUICK_ENTRYPOINT_OFFSET(pAputObject);
+  CallRuntimeHelperRegLocationRegLocationRegLocation(helper, rl_array, rl_index, rl_src, true);
+}
+
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
   int res_reg = AllocTemp();
@@ -1113,8 +1121,8 @@
   if (!type_known_abstract) {
     branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL);
   }
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg1),
-                          TargetReg(kArg2), true);
+  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2),
+                          TargetReg(kArg1), true);
   /* branch target here */
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch1->target = target;
@@ -1651,7 +1659,7 @@
     case Instruction::REM_LONG_2ADDR:
       call_out = true;
       check_zero = true;
-      func_offset = QUICK_ENTRYPOINT_OFFSET(pLdivmod);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pLmod);
       /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */
       ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2) : TargetReg(kRet0);
       break;
@@ -1765,4 +1773,16 @@
   suspend_launchpads_.Insert(launch_pad);
 }
 
+/* Call out to helper assembly routine that will null check obj and then lock it. */
+void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pLockObject), rl_src, true);
+}
+
+/* Call out to helper assembly routine that will null check obj and then unlock it. */
+void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2a0a23c..0a0cc17 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -214,6 +214,7 @@
                                                          int arg0, RegLocation arg1,
                                                          RegLocation arg2, bool safepoint_pc) {
   int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_EQ(arg1.wide, 0U);
   LoadValueDirectFixed(arg1, TargetReg(kArg1));
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
@@ -225,6 +226,21 @@
   CallHelper(r_tgt, helper_offset, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset,
+                                                                 RegLocation arg0, RegLocation arg1,
+                                                                 RegLocation arg2,
+                                                                 bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_EQ(arg0.wide, 0U);
+  LoadValueDirectFixed(arg0, TargetReg(kArg0));
+  DCHECK_EQ(arg1.wide, 0U);
+  LoadValueDirectFixed(arg1, TargetReg(kArg1));
+  DCHECK_EQ(arg1.wide, 0U);
+  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  ClobberCalleeSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform intial
@@ -810,7 +826,7 @@
       OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
       LIR* ld = OpVldm(TargetReg(kArg3), regs_left);
       // TUNING: loosen barrier
-      ld->def_mask = ENCODE_ALL;
+      ld->u.m.def_mask = ENCODE_ALL;
       SetMemRefType(ld, true /* is_load */, kDalvikReg);
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                direct_code, direct_method, type);
@@ -819,7 +835,7 @@
                                direct_code, direct_method, type);
       LIR* st = OpVstm(TargetReg(kArg3), regs_left);
       SetMemRefType(st, false /* is_load */, kDalvikReg);
-      st->def_mask = ENCODE_ALL;
+      st->u.m.def_mask = ENCODE_ALL;
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                direct_code, direct_method, type);
     }
@@ -1219,8 +1235,10 @@
    * method.  By doing this during basic block construction, we can also
    * take advantage of/generate new useful dataflow info.
    */
+  const DexFile::MethodId& target_mid = cu_->dex_file->GetMethodId(info->index);
+  const DexFile::TypeId& declaring_type = cu_->dex_file->GetTypeId(target_mid.class_idx_);
   StringPiece tgt_methods_declaring_class(
-      cu_->dex_file->GetMethodDeclaringClassDescriptor(cu_->dex_file->GetMethodId(info->index)));
+      cu_->dex_file->StringDataAsStringPieceByIdx(declaring_type.descriptor_idx_));
   if (tgt_methods_declaring_class.starts_with("Ljava/lang/Double;")) {
     std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
     if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") {
@@ -1373,10 +1391,10 @@
   bool fast_path =
       cu_->compiler_driver->ComputeInvokeInfo(mir_graph_->GetCurrentDexCompilationUnit(),
                                               current_dalvik_offset_,
-                                              info->type, target_method,
-                                              vtable_idx,
-                                              direct_code, direct_method,
-                                              true) && !SLOW_INVOKE_PATH;
+                                              true, true,
+                                              &info->type, &target_method,
+                                              &vtable_idx,
+                                              &direct_code, &direct_method) && !SLOW_INVOKE_PATH;
   if (info->type == kInterface) {
     if (fast_path) {
       p_null_ck = &null_ck;
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 630e990..0f29578 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -21,8 +21,8 @@
 #define DEBUG_OPT(X)
 
 /* Check RAW, WAR, and RAW dependency on the register operands */
-#define CHECK_REG_DEP(use, def, check) ((def & check->use_mask) || \
-                                        ((use | def) & check->def_mask))
+#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \
+                                        ((use | def) & check->u.m.def_mask))
 
 /* Scheduler heuristics */
 #define MAX_HOIST_DISTANCE 20
@@ -30,10 +30,10 @@
 #define LD_LATENCY 2
 
 static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) {
-  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->alias_info);
-  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->alias_info);
-  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->alias_info);
-  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->alias_info);
+  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info);
+  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info);
+  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info);
+  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info);
 
   return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
 }
@@ -78,7 +78,7 @@
   }
 
   for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) {
-    if (is_pseudo_opcode(this_lir->opcode)) {
+    if (IsPseudoLirOp(this_lir->opcode)) {
       continue;
     }
 
@@ -99,15 +99,14 @@
     int native_reg_id;
     if (cu_->instruction_set == kX86) {
       // If x86, location differs depending on whether memory/reg operation.
-      native_reg_id = (GetTargetInstFlags(this_lir->opcode) & IS_STORE) ? this_lir->operands[2]
-          : this_lir->operands[0];
+      native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0];
     } else {
       native_reg_id = this_lir->operands[0];
     }
-    bool is_this_lir_load = GetTargetInstFlags(this_lir->opcode) & IS_LOAD;
+    bool is_this_lir_load = target_flags & IS_LOAD;
     LIR* check_lir;
     /* Use the mem mask to determine the rough memory location */
-    uint64_t this_mem_mask = (this_lir->use_mask | this_lir->def_mask) & ENCODE_MEM;
+    uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM;
 
     /*
      * Currently only eliminate redundant ld/st for constant and Dalvik
@@ -117,10 +116,10 @@
       continue;
     }
 
-    uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM;
+    uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM;
     uint64_t stop_use_reg_mask;
     if (cu_->instruction_set == kX86) {
-      stop_use_reg_mask = (IS_BRANCH | this_lir->use_mask) & ~ENCODE_MEM;
+      stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM;
     } else {
       /*
        * Add pc to the resource mask to prevent this instruction
@@ -128,7 +127,7 @@
        * region bits since stop_mask is used to check data/control
        * dependencies.
        */
-        stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->use_mask) & ~ENCODE_MEM;
+        stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM;
     }
 
     for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
@@ -136,11 +135,11 @@
        * Skip already dead instructions (whose dataflow information is
        * outdated and misleading).
        */
-      if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) {
+      if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) {
         continue;
       }
 
-      uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM;
+      uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM;
       uint64_t alias_condition = this_mem_mask & check_mem_mask;
       bool stop_here = false;
 
@@ -160,7 +159,7 @@
            */
           DCHECK(!(check_flags & IS_STORE));
           /* Same value && same register type */
-          if (check_lir->alias_info == this_lir->alias_info &&
+          if (check_lir->flags.alias_info == this_lir->flags.alias_info &&
               SameRegType(check_lir->operands[0], native_reg_id)) {
             /*
              * Different destination register - insert
@@ -169,11 +168,11 @@
             if (check_lir->operands[0] != native_reg_id) {
               ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id);
             }
-            check_lir->flags.is_nop = true;
+            NopLIR(check_lir);
           }
         } else if (alias_condition == ENCODE_DALVIK_REG) {
           /* Must alias */
-          if (check_lir->alias_info == this_lir->alias_info) {
+          if (check_lir->flags.alias_info == this_lir->flags.alias_info) {
             /* Only optimize compatible registers */
             bool reg_compatible = SameRegType(check_lir->operands[0], native_reg_id);
             if ((is_this_lir_load && is_check_lir_load) ||
@@ -188,7 +187,7 @@
                   native_reg_id) {
                   ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id);
                 }
-                check_lir->flags.is_nop = true;
+                NopLIR(check_lir);
               } else {
                 /*
                  * Destinaions are of different types -
@@ -202,7 +201,7 @@
               stop_here = true;
             } else if (!is_this_lir_load && !is_check_lir_load) {
               /* WAW - nuke the earlier store */
-              this_lir->flags.is_nop = true;
+              NopLIR(this_lir);
               stop_here = true;
             }
           /* Partial overlap */
@@ -257,7 +256,7 @@
            * top-down order.
            */
           InsertLIRBefore(check_lir, new_store_lir);
-          this_lir->flags.is_nop = true;
+          NopLIR(this_lir);
         }
         break;
       } else if (!check_lir->flags.is_nop) {
@@ -286,7 +285,7 @@
 
   /* Start from the second instruction */
   for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
-    if (is_pseudo_opcode(this_lir->opcode)) {
+    if (IsPseudoLirOp(this_lir->opcode)) {
       continue;
     }
 
@@ -298,7 +297,7 @@
       continue;
     }
 
-    uint64_t stop_use_all_mask = this_lir->use_mask;
+    uint64_t stop_use_all_mask = this_lir->u.m.use_mask;
 
     if (cu_->instruction_set != kX86) {
       /*
@@ -314,7 +313,7 @@
 
     /* Similar as above, but just check for pure register dependency */
     uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM;
-    uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM;
+    uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM;
 
     int next_slot = 0;
     bool stop_here = false;
@@ -329,7 +328,7 @@
         continue;
       }
 
-      uint64_t check_mem_mask = check_lir->def_mask & ENCODE_MEM;
+      uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM;
       uint64_t alias_condition = stop_use_all_mask & check_mem_mask;
       stop_here = false;
 
@@ -338,7 +337,7 @@
         /* We can fully disambiguate Dalvik references */
         if (alias_condition == ENCODE_DALVIK_REG) {
           /* Must alias or partually overlap */
-          if ((check_lir->alias_info == this_lir->alias_info) ||
+          if ((check_lir->flags.alias_info == this_lir->flags.alias_info) ||
             IsDalvikRegisterClobbered(this_lir, check_lir)) {
             stop_here = true;
           }
@@ -363,7 +362,7 @@
        * Store the dependent or non-pseudo/indepedent instruction to the
        * list.
        */
-      if (stop_here || !is_pseudo_opcode(check_lir->opcode)) {
+      if (stop_here || !IsPseudoLirOp(check_lir->opcode)) {
         prev_inst_list[next_slot++] = check_lir;
         if (next_slot == MAX_HOIST_DISTANCE) {
           break;
@@ -394,7 +393,7 @@
       int slot;
       LIR* dep_lir = prev_inst_list[next_slot-1];
       /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
-      if (!is_pseudo_opcode(dep_lir->opcode) &&
+      if (!IsPseudoLirOp(dep_lir->opcode) &&
         (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) {
         first_slot -= LDLD_DISTANCE;
       }
@@ -407,7 +406,7 @@
         LIR* prev_lir = prev_inst_list[slot+1];
 
         /* Check the highest instruction */
-        if (prev_lir->def_mask == ENCODE_ALL) {
+        if (prev_lir->u.m.def_mask == ENCODE_ALL) {
           /*
            * If the first instruction is a load, don't hoist anything
            * above it since it is unlikely to be beneficial.
@@ -435,9 +434,9 @@
          * Try to find two instructions with load/use dependency until
          * the remaining instructions are less than LD_LATENCY.
          */
-        bool prev_is_load = is_pseudo_opcode(prev_lir->opcode) ? false :
+        bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false :
             (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD);
-        if (((cur_lir->use_mask & prev_lir->def_mask) && prev_is_load) || (slot < LD_LATENCY)) {
+        if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) {
           break;
         }
       }
@@ -453,7 +452,7 @@
          * is never the first LIR on the list
          */
         InsertLIRBefore(cur_lir, new_load_lir);
-        this_lir->flags.is_nop = true;
+        NopLIR(this_lir);
       }
     }
   }
@@ -468,41 +467,4 @@
   }
 }
 
-/*
- * Nop any unconditional branches that go to the next instruction.
- * Note: new redundant branches may be inserted later, and we'll
- * use a check in final instruction assembly to nop those out.
- */
-void Mir2Lir::RemoveRedundantBranches() {
-  LIR* this_lir;
-
-  for (this_lir = first_lir_insn_; this_lir != last_lir_insn_; this_lir = NEXT_LIR(this_lir)) {
-    /* Branch to the next instruction */
-    if (IsUnconditionalBranch(this_lir)) {
-      LIR* next_lir = this_lir;
-
-      while (true) {
-        next_lir = NEXT_LIR(next_lir);
-
-        /*
-         * Is the branch target the next instruction?
-         */
-        if (next_lir == this_lir->target) {
-          this_lir->flags.is_nop = true;
-          break;
-        }
-
-        /*
-         * Found real useful stuff between the branch and the target.
-         * Need to explicitly check the last_lir_insn_ here because it
-         * might be the last real instruction.
-         */
-        if (!is_pseudo_opcode(next_lir->opcode) ||
-          (next_lir == last_lir_insn_))
-          break;
-      }
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index cd25232..6bfccfd 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -503,7 +503,7 @@
   if (!unconditional) {
     InsertLIRBefore(lir, hop_target);
   }
-  lir->flags.is_nop = true;
+  NopLIR(lir);
 }
 
 /*
@@ -526,7 +526,7 @@
       continue;
     }
 
-    if (lir->flags.pcRelFixup) {
+    if (lir->flags.fixup != kFixupNone) {
       if (lir->opcode == kMipsDelta) {
         /*
          * The "Delta" pseudo-ops load the difference between
@@ -561,7 +561,7 @@
               RawLIR(lir->dalvik_offset, kMipsAddu,
                      lir->operands[0], lir->operands[0], r_RA);
           InsertLIRBefore(lir, new_addu);
-          lir->flags.is_nop = true;
+          NopLIR(lir);
           res = kRetryAll;
         }
       } else if (lir->opcode == kMipsDeltaLo) {
@@ -646,6 +646,7 @@
     if (res != kSuccess) {
       continue;
     }
+    DCHECK(!IsPseudoLirOp(lir->opcode));
     const MipsEncodingMap *encoder = &EncodingMap[lir->opcode];
     uint32_t bits = encoder->skeleton;
     int i;
@@ -695,6 +696,7 @@
     code_buffer_.push_back((bits >> 24) & 0xff);
     // TUNING: replace with proper delay slot handling
     if (encoder->size == 8) {
+      DCHECK(!IsPseudoLirOp(lir->opcode));
       const MipsEncodingMap *encoder = &EncodingMap[kMipsNop];
       uint32_t bits = encoder->skeleton;
       code_buffer_.push_back(bits & 0xff);
@@ -707,7 +709,101 @@
 }
 
 int MipsMir2Lir::GetInsnSize(LIR* lir) {
+  DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
 
+// LIR offset assignment.
+// TODO: consolidate w/ Arm assembly mechanism.
+int MipsMir2Lir::AssignInsnOffsets() {
+  LIR* lir;
+  int offset = 0;
+
+  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
+    lir->offset = offset;
+    if (LIKELY(lir->opcode >= 0)) {
+      if (!lir->flags.is_nop) {
+        offset += lir->flags.size;
+      }
+    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+      if (offset & 0x2) {
+        offset += 2;
+        lir->operands[0] = 1;
+      } else {
+        lir->operands[0] = 0;
+      }
+    }
+    /* Pseudo opcodes don't consume space */
+  }
+  return offset;
+}
+
+/*
+ * Walk the compilation unit and assign offsets to instructions
+ * and literals and compute the total size of the compiled unit.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void MipsMir2Lir::AssignOffsets() {
+  int offset = AssignInsnOffsets();
+
+  /* Const values have to be word aligned */
+  offset = (offset + 3) & ~3;
+
+  /* Set up offsets for literals */
+  data_offset_ = offset;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  offset = AssignFillArrayDataOffset(offset);
+
+  total_size_ = offset;
+}
+
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void MipsMir2Lir::AssembleLIR() {
+  AssignOffsets();
+  int assembler_retries = 0;
+  /*
+   * Assemble here.  Note that we generate code with optimistic assumptions
+   * and if found now to work, we'll have to redo the sequence and retry.
+   */
+
+  while (true) {
+    AssemblerStatus res = AssembleInstructions(0);
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      // Redo offsets and try again
+      AssignOffsets();
+      code_buffer_.clear();
+    }
+  }
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  CreateMappingTables();
+
+  CreateNativeGcMap();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index d53c012..9a5ca2c 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,36 +261,6 @@
   MarkSafepointPC(call_inst);
 }
 
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
-void MipsMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rMIPS_ARG0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags);
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pLockObject));
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, r_tgt);
-  MarkSafepointPC(call_inst);
-}
-
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
-void MipsMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rMIPS_ARG0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pUnlockObject));
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, r_tgt);
-  MarkSafepointPC(call_inst);
-}
-
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -318,6 +288,7 @@
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
+
 void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index b9cb720..387fef3 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -52,7 +52,6 @@
     int AllocTypedTempPair(bool fp_hint, int reg_class);
     int S2d(int low_reg, int high_reg);
     int TargetReg(SpecialTargetRegister reg);
-    RegisterInfo* GetRegInfo(int reg);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -72,9 +71,12 @@
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
+    void AssembleLIR();
+    int AssignInsnOffsets();
+    void AssignOffsets();
     AssemblerStatus AssembleInstructions(uintptr_t start_addr);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
@@ -86,12 +88,10 @@
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
-                                RegLocation rl_src, int scale);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_dest, int scale);
+                     RegLocation rl_index, RegLocation rl_dest, int scale);
     void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale);
+                     RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_shift);
     void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -124,8 +124,6 @@
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
     void GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                                int first_bit, int second_bit);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 6ce5750..7fefd33 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -484,7 +484,7 @@
  *
  */
 void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale) {
+                          RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -498,12 +498,14 @@
   rl_array = LoadValue(rl_array, kCoreReg);
   rl_index = LoadValue(rl_index, kCoreReg);
   int reg_ptr = INVALID_REG;
+  bool allocated_reg_ptr_temp = false;
   if (IsTemp(rl_array.low_reg)) {
     Clobber(rl_array.low_reg);
     reg_ptr = rl_array.low_reg;
   } else {
     reg_ptr = AllocTemp();
     OpRegCopy(reg_ptr, rl_array.low_reg);
+    allocated_reg_ptr_temp = true;
   }
 
   /* null object? */
@@ -538,8 +540,6 @@
     }
 
     StoreBaseDispWide(reg_ptr, 0, rl_src.low_reg, rl_src.high_reg);
-
-    FreeTemp(reg_ptr);
   } else {
     rl_src = LoadValue(rl_src, reg_class);
     if (needs_range_check) {
@@ -549,65 +549,11 @@
     StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg,
                      scale, size);
   }
-}
-
-/*
- * Generate array store
- *
- */
-void MipsMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value();
-
-  FlushAllRegs();  // Use explicit registers
-  LockCallTemps();
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(rl_index, r_index);  // Grab index
-
-  GenNullCheck(rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  int reg_len = INVALID_REG;
-  if (needs_range_check) {
-    reg_len = TargetReg(kArg1);
-    LoadWordDisp(r_array, len_offset, reg_len);  // Get len
+  if (allocated_reg_ptr_temp) {
+    FreeTemp(reg_ptr);
   }
-  /* r_ptr -> array data */
-  int r_ptr = AllocTemp();
-  OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset);
-  if (needs_range_check) {
-    GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds);
-  }
-  StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord);
-  FreeTemp(r_ptr);
-  FreeTemp(r_index);
-  if (!mir_graph_->IsConstantNullRef(rl_src)) {
-    MarkGCCard(r_value, r_array);
+  if (card_mark) {
+    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
   }
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 4ee5b23..0ee32d4 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -120,22 +120,21 @@
 }
 
 
-void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir) {
+void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kMips);
+  DCHECK(!lir->flags.use_def_invalid);
 
   // Mips-specific resource map setup here.
-  uint64_t flags = MipsMir2Lir::EncodingMap[lir->opcode].flags;
-
   if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_MIPS_REG_SP;
+    lir->u.m.def_mask |= ENCODE_MIPS_REG_SP;
   }
 
   if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_MIPS_REG_SP;
+    lir->u.m.use_mask |= ENCODE_MIPS_REG_SP;
   }
 
   if (flags & REG_DEF_LR) {
-    lir->def_mask |= ENCODE_MIPS_REG_LR;
+    lir->u.m.def_mask |= ENCODE_MIPS_REG_LR;
   }
 }
 
@@ -269,8 +268,8 @@
     }
     /* Memory bits */
     if (mips_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", mips_lir->alias_info & 0xffff,
-              (mips_lir->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info),
+              DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
@@ -399,11 +398,6 @@
   return res;
 }
 
-MipsMir2Lir::RegisterInfo* MipsMir2Lir::GetRegInfo(int reg) {
-  return MIPS_FPREG(reg) ? &reg_pool_->FPRegs[reg & MIPS_FP_REG_MASK]
-            : &reg_pool_->core_regs[reg];
-}
-
 /* To be used when explicitly managing register use */
 void MipsMir2Lir::LockCallTemps() {
   LockTemp(rMIPS_ARG0);
@@ -559,14 +553,17 @@
 }
 
 uint64_t MipsMir2Lir::GetTargetInstFlags(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return MipsMir2Lir::EncodingMap[opcode].flags;
 }
 
 const char* MipsMir2Lir::GetTargetInstName(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return MipsMir2Lir::EncodingMap[opcode].name;
 }
 
 const char* MipsMir2Lir::GetTargetInstFmt(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return MipsMir2Lir::EncodingMap[opcode].fmt;
 }
 
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 440df2a..f293700 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -33,7 +33,12 @@
     p->def_end = NULL;
     if (p->pair) {
       p->pair = false;
-      Clobber(p->partner);
+      p = GetRegInfo(p->partner);
+      p->pair = false;
+      p->live = false;
+      p->s_reg = INVALID_SREG;
+      p->def_start = NULL;
+      p->def_end = NULL;
     }
   }
 }
@@ -53,7 +58,8 @@
   if ((opcode == kPseudoTargetLabel) || (opcode == kPseudoSafepointPC) ||
       (opcode == kPseudoExportedPC)) {
     // Always make labels scheduling barriers
-    insn->use_mask = insn->def_mask = ENCODE_ALL;
+    DCHECK(!insn->flags.use_def_invalid);
+    insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL;
   }
   return insn;
 }
@@ -63,7 +69,7 @@
  * operands.
  */
 inline LIR* Mir2Lir::NewLIR0(int opcode) {
-  DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
@@ -73,7 +79,7 @@
 }
 
 inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) {
-  DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
@@ -83,7 +89,7 @@
 }
 
 inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) {
-  DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
@@ -93,7 +99,7 @@
 }
 
 inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) {
-  DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
@@ -103,7 +109,7 @@
 }
 
 inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) {
-  DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
@@ -114,7 +120,7 @@
 
 inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1,
                              int info2) {
-  DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
@@ -136,20 +142,23 @@
 inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
   int opcode = lir->opcode;
 
-  if (opcode <= 0) {
-    lir->use_mask = lir->def_mask = 0;
+  if (IsPseudoLirOp(opcode)) {
+    if (opcode != kPseudoBarrier) {
+      lir->flags.fixup = kFixupLabel;
+    }
     return;
   }
 
   uint64_t flags = GetTargetInstFlags(opcode);
 
   if (flags & NEEDS_FIXUP) {
-    lir->flags.pcRelFixup = true;
+    // Note: target-specific setup may specialize the fixup kind.
+    lir->flags.fixup = kFixupLabel;
   }
 
   /* Get the starting size of the instruction's template */
   lir->flags.size = GetInsnSize(lir);
-
+  estimated_native_code_size_ += lir->flags.size;
   /* Set up the mask for resources that are updated */
   if (flags & (IS_LOAD | IS_STORE)) {
     /* Default to heap - will catch specialized classes later */
@@ -161,39 +170,49 @@
    * turn will trash everything.
    */
   if (flags & IS_BRANCH) {
-    lir->def_mask = lir->use_mask = ENCODE_ALL;
+    lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL;
     return;
   }
 
   if (flags & REG_DEF0) {
-    SetupRegMask(&lir->def_mask, lir->operands[0]);
+    SetupRegMask(&lir->u.m.def_mask, lir->operands[0]);
   }
 
   if (flags & REG_DEF1) {
-    SetupRegMask(&lir->def_mask, lir->operands[1]);
+    SetupRegMask(&lir->u.m.def_mask, lir->operands[1]);
   }
 
+  if (flags & REG_USE0) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[0]);
+  }
+
+  if (flags & REG_USE1) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[1]);
+  }
+
+  if (flags & REG_USE2) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[2]);
+  }
+
+  if (flags & REG_USE3) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[3]);
+  }
 
   if (flags & SETS_CCODES) {
-    lir->def_mask |= ENCODE_CCODE;
-  }
-
-  if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
-    int i;
-
-    for (i = 0; i < 4; i++) {
-      if (flags & (1 << (kRegUse0 + i))) {
-        SetupRegMask(&lir->use_mask, lir->operands[i]);
-      }
-    }
+    lir->u.m.def_mask |= ENCODE_CCODE;
   }
 
   if (flags & USES_CCODES) {
-    lir->use_mask |= ENCODE_CCODE;
+    lir->u.m.use_mask |= ENCODE_CCODE;
   }
 
   // Handle target-specific actions
-  SetupTargetResourceMasks(lir);
+  SetupTargetResourceMasks(lir, flags);
+}
+
+inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(int reg) {
+  DCHECK(reginfo_map_.Get(reg) != NULL);
+  return reginfo_map_.Get(reg);
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c41feb1..2b26c3d 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -18,6 +18,7 @@
 #include "dex/dataflow_iterator-inl.h"
 #include "mir_to_lir-inl.h"
 #include "object_utils.h"
+#include "thread-inl.h"
 
 namespace art {
 
@@ -337,22 +338,35 @@
       GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1);
       break;
     case Instruction::APUT_WIDE:
-      GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3);
+      GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3, false);
       break;
     case Instruction::APUT:
-      GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2);
+      GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, false);
       break;
-    case Instruction::APUT_OBJECT:
-      GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0], 2);
+    case Instruction::APUT_OBJECT: {
+      bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]);
+      bool is_safe = is_null;  // Always safe to store null.
+      if (!is_safe) {
+        // Check safety from verifier type information.
+        const MethodReference mr(cu_->dex_file, cu_->method_idx);
+        is_safe = cu_->compiler_driver->IsSafeCast(mr, mir->offset);
+      }
+      if (is_null || is_safe) {
+        // Store of constant null doesn't require an assignability test and can be generated inline
+        // without fixed register usage or a card mark.
+        GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, !is_null);
+      } else {
+        GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0]);
+      }
       break;
+    }
     case Instruction::APUT_SHORT:
     case Instruction::APUT_CHAR:
-      GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1);
+      GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1, false);
       break;
     case Instruction::APUT_BYTE:
     case Instruction::APUT_BOOLEAN:
-      GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2],
-            rl_src[0], 0);
+      GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], rl_src[0], 0, false);
       break;
 
     case Instruction::IGET_OBJECT:
@@ -696,6 +710,7 @@
 
   // Insert the block label.
   block_label_list_[block_id].opcode = kPseudoNormalBlockLabel;
+  block_label_list_[block_id].flags.fixup = kFixupLabel;
   AppendLIR(&block_label_list_[block_id]);
 
   LIR* head_lir = NULL;
@@ -706,16 +721,15 @@
   }
 
   // Free temp registers and reset redundant store tracking.
-  ResetRegPool();
-  ResetDefTracking();
-
   ClobberAllRegs();
 
   if (bb->block_type == kEntryBlock) {
+    ResetRegPool();
     int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
     GenEntrySequence(&mir_graph_->reg_location_[start_vreg],
                          mir_graph_->reg_location_[mir_graph_->GetMethodSReg()]);
   } else if (bb->block_type == kExitBlock) {
+    ResetRegPool();
     GenExitSequence();
   }
 
@@ -736,17 +750,18 @@
 
     current_dalvik_offset_ = mir->offset;
     int opcode = mir->dalvikInsn.opcode;
-    LIR* boundary_lir;
 
     // Mark the beginning of a Dalvik instruction for line tracking.
-    char* inst_str = cu_->verbose ?
-       mir_graph_->GetDalvikDisassembly(mir) : NULL;
-    boundary_lir = MarkBoundary(mir->offset, inst_str);
+    if (cu_->verbose) {
+       char* inst_str = mir_graph_->GetDalvikDisassembly(mir);
+       MarkBoundary(mir->offset, inst_str);
+    }
     // Remember the first LIR for this block.
     if (head_lir == NULL) {
-      head_lir = boundary_lir;
-      // Set the first boundary_lir as a scheduling barrier.
-      head_lir->def_mask = ENCODE_ALL;
+      head_lir = &block_label_list_[bb->id];
+      // Set the first label as a scheduling barrier.
+      DCHECK(!head_lir->flags.use_def_invalid);
+      head_lir->u.m.def_mask = ENCODE_ALL;
     }
 
     if (opcode == kMirOpCheck) {
@@ -771,11 +786,6 @@
   if (head_lir) {
     // Eliminate redundant loads/stores and delay stores into later slots.
     ApplyLocalOptimizations(head_lir, last_lir_insn_);
-
-    // Generate an unconditional branch to the fallthrough block.
-    if (bb->fall_through) {
-      OpUnconditionalBranch(&block_label_list_[bb->fall_through->id]);
-    }
   }
   return false;
 }
@@ -815,9 +825,19 @@
       static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(),
                                       ArenaAllocator::kAllocLIR));
 
-  PreOrderDfsIterator iter(mir_graph_, false /* not iterative */);
-  for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
-    MethodBlockCodeGen(bb);
+  PreOrderDfsIterator iter(mir_graph_);
+  BasicBlock* curr_bb = iter.Next();
+  BasicBlock* next_bb = iter.Next();
+  while (curr_bb != NULL) {
+    MethodBlockCodeGen(curr_bb);
+    // If the fall_through block is no longer laid out consecutively, drop in a branch.
+    if ((curr_bb->fall_through != NULL) && (curr_bb->fall_through != next_bb)) {
+      OpUnconditionalBranch(&block_label_list_[curr_bb->fall_through->id]);
+    }
+    curr_bb = next_bb;
+    do {
+      next_bb = iter.Next();
+    } while ((next_bb != NULL) && (next_bb->block_type == kDead));
   }
 
   HandleSuspendLaunchPads();
@@ -825,10 +845,6 @@
   HandleThrowLaunchPads();
 
   HandleIntrinsicLaunchPads();
-
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations))) {
-    RemoveRedundantBranches();
-  }
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index a37ebd1..5df2672 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -95,6 +95,7 @@
 struct CallInfo;
 struct CompilationUnit;
 struct MIR;
+struct LIR;
 struct RegLocation;
 struct RegisterInfo;
 class MIRGraph;
@@ -107,24 +108,36 @@
 
 typedef std::vector<uint8_t> CodeBuffer;
 
+struct UseDefMasks {
+  uint64_t use_mask;        // Resource mask for use.
+  uint64_t def_mask;        // Resource mask for def.
+};
+
+struct AssemblyInfo {
+  LIR* pcrel_next;           // Chain of LIR nodes needing pc relative fixups.
+  uint8_t bytes[16];         // Encoded instruction bytes.
+};
 
 struct LIR {
   int offset;               // Offset of this instruction.
-  int dalvik_offset;        // Offset of Dalvik opcode.
+  uint16_t dalvik_offset;   // Offset of Dalvik opcode in code units (16-bit words).
+  int16_t opcode;
   LIR* next;
   LIR* prev;
   LIR* target;
-  int opcode;
-  int operands[5];          // [0..4] = [dest, src1, src2, extra, extra2].
   struct {
-    bool is_nop:1;          // LIR is optimized away.
-    bool pcRelFixup:1;      // May need pc-relative fixup.
-    unsigned int size:5;    // Note: size is in bytes.
-    unsigned int unused:25;
+    unsigned int alias_info:17;  // For Dalvik register disambiguation.
+    bool is_nop:1;               // LIR is optimized away.
+    unsigned int size:4;         // Note: size of encoded instruction is in bytes.
+    bool use_def_invalid:1;      // If true, masks should not be used.
+    unsigned int generation:1;   // Used to track visitation state during fixup pass.
+    unsigned int fixup:8;        // Fixup kind.
   } flags;
-  int alias_info;           // For Dalvik register & litpool disambiguation.
-  uint64_t use_mask;        // Resource mask for use.
-  uint64_t def_mask;        // Resource mask for def.
+  union {
+    UseDefMasks m;          // Use & Def masks used during optimization.
+    AssemblyInfo a;         // Instruction encoding used during assembly phase.
+  } u;
+  int operands[5];          // [0..4] = [dest, src1, src2, extra, extra2].
 };
 
 // Target-specific initialization.
@@ -141,7 +154,7 @@
 
 // Defines for alias_info (tracks Dalvik register references).
 #define DECODE_ALIAS_INFO_REG(X)        (X & 0xffff)
-#define DECODE_ALIAS_INFO_WIDE_FLAG     (0x80000000)
+#define DECODE_ALIAS_INFO_WIDE_FLAG     (0x10000)
 #define DECODE_ALIAS_INFO_WIDE(X)       ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0)
 #define ENCODE_ALIAS_INFO(REG, ISWIDE)  (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0))
 
@@ -158,13 +171,16 @@
 #define ENCODE_ALL              (~0ULL)
 #define ENCODE_MEM              (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
                                  ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
+
+// Mask to denote sreg as the start of a double.  Must not interfere with low 16 bits.
+#define STARTING_DOUBLE_SREG 0x10000
+
 // TODO: replace these macros
 #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath))
 #define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath))
 #define SLOW_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowStringPath))
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
-#define is_pseudo_opcode(opcode) (static_cast<int>(opcode) < 0)
 
 class Mir2Lir : public Backend {
   public:
@@ -187,7 +203,6 @@
     struct RefCounts {
       int count;
       int s_reg;
-      bool double_start;   // Starting v_reg for a double
     };
 
     /*
@@ -241,6 +256,10 @@
       return code_buffer_.size() / sizeof(code_buffer_[0]);
     }
 
+    bool IsPseudoLirOp(int opcode) {
+      return (opcode < 0);
+    }
+
     // Shared by all targets - implemented in codegen_util.cc
     void AppendLIR(LIR* lir);
     void InsertLIRBefore(LIR* current_lir, LIR* new_lir);
@@ -250,9 +269,8 @@
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
     void MarkSafepointPC(LIR* inst);
-    bool FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put);
+    bool FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile);
     void SetupResourceMasks(LIR* lir);
-    void AssembleLIR();
     void SetMemRefType(LIR* lir, bool is_load, int mem_type);
     void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
     void SetupRegMask(uint64_t* mask, int reg);
@@ -274,13 +292,14 @@
     void ProcessSwitchTables();
     void DumpSparseSwitchTable(const uint16_t* table);
     void DumpPackedSwitchTable(const uint16_t* table);
-    LIR* MarkBoundary(int offset, const char* inst_str);
+    void MarkBoundary(int offset, const char* inst_str);
     void NopLIR(LIR* lir);
+    void UnlinkLIR(LIR* lir);
     bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
     bool IsInexpensiveConstant(RegLocation rl_src);
     ConditionCode FlipComparisonOrder(ConditionCode before);
-    void DumpMappingTable(const char* table_name, const std::string& descriptor,
-                          const std::string& name, const std::string& signature,
+    void DumpMappingTable(const char* table_name, const char* descriptor,
+                          const char* name, const Signature& signature,
                           const std::vector<uint32_t>& v);
     void InstallLiteralPools();
     void InstallSwitchTables();
@@ -291,8 +310,6 @@
     int AssignLiteralOffset(int offset);
     int AssignSwitchTablesOffset(int offset);
     int AssignFillArrayDataOffset(int offset);
-    int AssignInsnOffsets();
-    void AssignOffsets();
     LIR* InsertCaseLabel(int vaddr, int keyVal);
     void MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec);
@@ -302,7 +319,6 @@
     void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir);
     void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir);
     void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir);
-    void RemoveRedundantBranches();
 
     // Shared by all targets - implemented in ralloc_util.cc
     int GetSRegHi(int lowSreg);
@@ -324,11 +340,9 @@
     void RecordCorePromotion(int reg, int s_reg);
     int AllocPreservedCoreReg(int s_reg);
     void RecordFpPromotion(int reg, int s_reg);
-    int AllocPreservedSingle(int s_reg, bool even);
+    int AllocPreservedSingle(int s_reg);
     int AllocPreservedDouble(int s_reg);
-    int AllocPreservedFPReg(int s_reg, bool double_start);
-    int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
-                      bool required);
+    int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required);
     int AllocTempDouble();
     int AllocFreeTemp();
     int AllocTemp();
@@ -367,13 +381,14 @@
     RegLocation UpdateRawLoc(RegLocation loc);
     RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
     RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts);
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
     void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
     RegLocation GetReturnWide(bool is_double);
     RegLocation GetReturn(bool is_float);
+    RegisterInfo* GetRegInfo(int reg);
 
     // Shared by all targets - implemented in gen_common.cc.
     bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -407,6 +422,9 @@
                  RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, bool is_object);
     void GenIPut(uint32_t field_idx, int opt_flags, OpSize size,
                  RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, bool is_object);
+    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
+                        RegLocation rl_src);
+
     void GenConstClass(uint32_t type_idx, RegLocation rl_dest);
     void GenConstString(uint32_t string_idx, RegLocation rl_dest);
     void GenNewInstance(uint32_t type_idx, RegLocation rl_dest);
@@ -463,6 +481,10 @@
     void CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_offset,
                                                     int arg0, RegLocation arg1, RegLocation arg2,
                                                     bool safepoint_pc);
+    void CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset,
+                                                            RegLocation arg0, RegLocation arg1,
+                                                            RegLocation arg2,
+                                                            bool safepoint_pc);
     void GenInvoke(CallInfo* info);
     void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
     int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
@@ -550,7 +572,6 @@
     virtual int AllocTypedTempPair(bool fp_hint, int reg_class) = 0;
     virtual int S2d(int low_reg, int high_reg) = 0;
     virtual int TargetReg(SpecialTargetRegister reg) = 0;
-    virtual RegisterInfo* GetRegInfo(int reg) = 0;
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -570,9 +591,9 @@
     virtual void CompilerInitializeRegAlloc() = 0;
 
     // Required for target - miscellaneous.
-    virtual AssemblerStatus AssembleInstructions(uintptr_t start_addr) = 0;
+    virtual void AssembleLIR() = 0;
     virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0;
-    virtual void SetupTargetResourceMasks(LIR* lir) = 0;
+    virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0;
     virtual const char* GetTargetInstFmt(int opcode) = 0;
     virtual const char* GetTargetInstName(int opcode) = 0;
     virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
@@ -628,8 +649,6 @@
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
     virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
-    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src) = 0;
-    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src) = 0;
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
                                                RegLocation rl_result, int lit, int first_bit,
@@ -642,12 +661,11 @@
                                  RegLocation rl_src) = 0;
     virtual void GenSpecialCase(BasicBlock* bb, MIR* mir,
                                 SpecialCaseHandler special_case) = 0;
-    virtual void GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                                RegLocation rl_index, RegLocation rl_src, int scale) = 0;
     virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
     virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale) = 0;
+                             RegLocation rl_index, RegLocation rl_src, int scale,
+                             bool card_mark) = 0;
     virtual void GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1,
                                    RegLocation rl_shift) = 0;
@@ -688,6 +706,10 @@
     virtual bool InexpensiveConstantLong(int64_t value) = 0;
     virtual bool InexpensiveConstantDouble(int64_t value) = 0;
 
+    // May be optimized by targets.
+    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
+    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
+
     // Temp workaround
     void Workaround7250540(RegLocation rl_dest, int value);
 
@@ -718,6 +740,7 @@
     LIR* literal_list_;                        // Constants.
     LIR* method_literal_list_;                 // Method literals requiring patching.
     LIR* code_literal_list_;                   // Code literals requiring patching.
+    LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.
 
   protected:
     CompilationUnit* const cu_;
@@ -727,7 +750,8 @@
     GrowableArray<LIR*> throw_launchpads_;
     GrowableArray<LIR*> suspend_launchpads_;
     GrowableArray<LIR*> intrinsic_launchpads_;
-    SafeMap<unsigned int, LIR*> boundary_map_;  // boundary lookup cache.
+    GrowableArray<RegisterInfo*> tempreg_info_;
+    GrowableArray<RegisterInfo*> reginfo_map_;
     /*
      * Holds mapping from native PC to dex PC for safepoints where we may deoptimize.
      * Native PC is on the return address of the safepointed operation.  Dex PC is for
@@ -739,6 +763,7 @@
      * immediately preceed the instruction.
      */
     std::vector<uint32_t> dex2pc_mapping_table_;
+    int current_code_offset_;             // Working byte offset of machine instructons.
     int data_offset_;                     // starting offset of literal pool.
     int total_size_;                      // header + code size.
     LIR* block_label_list_;
@@ -753,6 +778,7 @@
      * The low-level LIR creation utilites will pull it from here.  Rework this.
      */
     int current_dalvik_offset_;
+    int estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
     RegisterPool* reg_pool_;
     /*
      * Sanity checking for the register temp tracking.  The same ssa
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 71b74a4..7927ff9 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -28,13 +28,9 @@
  * live until it is either explicitly killed or reallocated.
  */
 void Mir2Lir::ResetRegPool() {
-  for (int i = 0; i < reg_pool_->num_core_regs; i++) {
-    if (reg_pool_->core_regs[i].is_temp)
-      reg_pool_->core_regs[i].in_use = false;
-  }
-  for (int i = 0; i < reg_pool_->num_fp_regs; i++) {
-    if (reg_pool_->FPRegs[i].is_temp)
-      reg_pool_->FPRegs[i].in_use = false;
+  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+    info->in_use = false;
   }
   // Reset temp tracking sanity check.
   if (kIsDebugBuild) {
@@ -48,13 +44,21 @@
   */
 void Mir2Lir::CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num) {
   for (int i = 0; i < num; i++) {
-    regs[i].reg = reg_nums[i];
+    uint32_t reg_number = reg_nums[i];
+    regs[i].reg = reg_number;
     regs[i].in_use = false;
     regs[i].is_temp = false;
     regs[i].pair = false;
     regs[i].live = false;
     regs[i].dirty = false;
     regs[i].s_reg = INVALID_SREG;
+    size_t map_size = reginfo_map_.Size();
+    if (reg_number >= map_size) {
+      for (uint32_t i = 0; i < ((reg_number - map_size) + 1); i++) {
+        reginfo_map_.Insert(NULL);
+      }
+    }
+    reginfo_map_.Put(reg_number, &regs[i]);
   }
 }
 
@@ -170,17 +174,12 @@
   promotion_map_[p_map_idx].FpReg = reg;
 }
 
-/*
- * Reserve a callee-save fp single register.  Try to fullfill request for
- * even/odd  allocation, but go ahead and allocate anything if not
- * available.  If nothing's available, return -1.
- */
-int Mir2Lir::AllocPreservedSingle(int s_reg, bool even) {
-  int res = -1;
+// Reserve a callee-save fp single register.
+int Mir2Lir::AllocPreservedSingle(int s_reg) {
+  int res = -1;  // Return code if none available.
   RegisterInfo* FPRegs = reg_pool_->FPRegs;
   for (int i = 0; i < reg_pool_->num_fp_regs; i++) {
-    if (!FPRegs[i].is_temp && !FPRegs[i].in_use &&
-      ((FPRegs[i].reg & 0x1) == 0) == even) {
+    if (!FPRegs[i].is_temp && !FPRegs[i].in_use) {
       res = FPRegs[i].reg;
       RecordFpPromotion(res, s_reg);
       break;
@@ -246,26 +245,6 @@
   return res;
 }
 
-
-/*
- * Reserve a callee-save fp register.   If this register can be used
- * as the first of a double, attempt to allocate an even pair of fp
- * single regs (but if can't still attempt to allocate a single, preferring
- * first to allocate an odd register.
- */
-int Mir2Lir::AllocPreservedFPReg(int s_reg, bool double_start) {
-  int res = -1;
-  if (double_start) {
-    res = AllocPreservedDouble(s_reg);
-  }
-  if (res == -1) {
-    res = AllocPreservedSingle(s_reg, false /* try odd # */);
-  }
-  if (res == -1)
-    res = AllocPreservedSingle(s_reg, true /* try even # */);
-  return res;
-}
-
 int Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
                            bool required) {
   int next = *next_temp;
@@ -379,7 +358,7 @@
   if (s_reg == -1)
     return NULL;
   for (int i = 0; i < num_regs; i++) {
-    if (p[i].live && (p[i].s_reg == s_reg)) {
+    if ((p[i].s_reg == s_reg) && p[i].live) {
       if (p[i].is_temp)
         p[i].in_use = true;
       return &p[i];
@@ -412,47 +391,16 @@
 }
 
 void Mir2Lir::FreeTemp(int reg) {
-  RegisterInfo* p = reg_pool_->core_regs;
-  int num_regs = reg_pool_->num_core_regs;
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].reg == reg) {
-      if (p[i].is_temp) {
-        p[i].in_use = false;
-      }
-      p[i].pair = false;
-      return;
-    }
+  RegisterInfo* p = GetRegInfo(reg);
+  if (p->is_temp) {
+    p->in_use = false;
   }
-  p = reg_pool_->FPRegs;
-  num_regs = reg_pool_->num_fp_regs;
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].reg == reg) {
-      if (p[i].is_temp) {
-        p[i].in_use = false;
-      }
-      p[i].pair = false;
-      return;
-    }
-  }
-  LOG(FATAL) << "Tried to free a non-existant temp: r" << reg;
+  p->pair = false;
 }
 
 Mir2Lir::RegisterInfo* Mir2Lir::IsLive(int reg) {
-  RegisterInfo* p = reg_pool_->core_regs;
-  int num_regs = reg_pool_->num_core_regs;
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].reg == reg) {
-      return p[i].live ? &p[i] : NULL;
-    }
-  }
-  p = reg_pool_->FPRegs;
-  num_regs = reg_pool_->num_fp_regs;
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].reg == reg) {
-      return p[i].live ? &p[i] : NULL;
-    }
-  }
-  return NULL;
+  RegisterInfo* p = GetRegInfo(reg);
+  return p->live ? p : NULL;
 }
 
 Mir2Lir::RegisterInfo* Mir2Lir::IsTemp(int reg) {
@@ -476,27 +424,10 @@
  * allocated.  Use with caution.
  */
 void Mir2Lir::LockTemp(int reg) {
-  RegisterInfo* p = reg_pool_->core_regs;
-  int num_regs = reg_pool_->num_core_regs;
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].reg == reg) {
-      DCHECK(p[i].is_temp);
-      p[i].in_use = true;
-      p[i].live = false;
-      return;
-    }
-  }
-  p = reg_pool_->FPRegs;
-  num_regs = reg_pool_->num_fp_regs;
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].reg == reg) {
-      DCHECK(p[i].is_temp);
-      p[i].in_use = true;
-      p[i].live = false;
-      return;
-    }
-  }
-  LOG(FATAL) << "Tried to lock a non-existant temp: r" << reg;
+  RegisterInfo* p = GetRegInfo(reg);
+  DCHECK(p->is_temp);
+  p->in_use = true;
+  p->live = false;
 }
 
 void Mir2Lir::ResetDef(int reg) {
@@ -599,11 +530,13 @@
 }
 
 void Mir2Lir::ClobberAllRegs() {
-  for (int i = 0; i< reg_pool_->num_core_regs; i++) {
-    ClobberBody(&reg_pool_->core_regs[i]);
-  }
-  for (int i = 0; i< reg_pool_->num_fp_regs; i++) {
-    ClobberBody(&reg_pool_->FPRegs[i]);
+  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+    info->live = false;
+    info->s_reg = INVALID_SREG;
+    info->def_start = NULL;
+    info->def_end = NULL;
+    info->pair = false;
   }
 }
 
@@ -659,11 +592,13 @@
 
 void Mir2Lir::MarkTemp(int reg) {
   RegisterInfo* info = GetRegInfo(reg);
+  tempreg_info_.Insert(info);
   info->is_temp = true;
 }
 
 void Mir2Lir::UnmarkTemp(int reg) {
   RegisterInfo* info = GetRegInfo(reg);
+  tempreg_info_.Delete(info);
   info->is_temp = false;
 }
 
@@ -912,18 +847,22 @@
 }
 
 /* USE SSA names to count references of base Dalvik v_regs. */
-void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts) {
+void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
     RegLocation loc = mir_graph_->reg_location_[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(loc.s_reg_low);
-    // Don't count easily regenerated immediates
-    if (loc.fp || !IsInexpensiveConstant(loc)) {
+    if (loc.fp) {
+      if (loc.wide) {
+        // Treat doubles as a unit, using upper half of fp_counts array.
+        counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i);
+        i++;
+      } else {
+        counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+      }
+    } else if (!IsInexpensiveConstant(loc)) {
       counts[p_map_idx].count += mir_graph_->GetUseCount(i);
     }
-    if (loc.wide && loc.fp && !loc.high_word) {
-      counts[p_map_idx].double_start = true;
-    }
   }
 }
 
@@ -942,7 +881,11 @@
 void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) {
   LOG(INFO) << msg;
   for (int i = 0; i < size; i++) {
-    LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+    if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
+      LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count;
+    } else {
+      LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+    }
   }
 }
 
@@ -965,7 +908,7 @@
    * count based on original Dalvik register name.  Count refs
    * separately based on type in order to give allocation
    * preference to fp doubles - which must be allocated sequential
-   * physical single fp registers started with an even-numbered
+   * physical single fp registers starting with an even-numbered
    * reg.
    * TUNING: replace with linear scan once we have the ability
    * to describe register live ranges for GC.
@@ -974,7 +917,7 @@
       static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs,
                                             ArenaAllocator::kAllocRegAlloc));
   RefCounts *FpRegs =
-      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2,
                                              ArenaAllocator::kAllocRegAlloc));
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < dalvik_regs; i++) {
@@ -982,46 +925,49 @@
   }
   // Set ssa name for Method*
   core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();
-  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy
+  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy.
+  FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg();  // for consistency.
   // Set ssa names for compiler_temps
   for (int i = 1; i <= cu_->num_compiler_temps; i++) {
     CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i);
     core_regs[dalvik_regs + i].s_reg = ct->s_reg;
     FpRegs[dalvik_regs + i].s_reg = ct->s_reg;
+    FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg;
+  }
+
+  // Duplicate in upper half to represent possible fp double starting sregs.
+  for (int i = 0; i < num_regs; i++) {
+    FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG;
   }
 
   // Sum use counts of SSA regs by original Dalvik vreg.
-  CountRefs(core_regs, FpRegs);
+  CountRefs(core_regs, FpRegs, num_regs);
 
-  /*
-   * Ideally, we'd allocate doubles starting with an even-numbered
-   * register.  Bias the counts to try to allocate any vreg that's
-   * used as the start of a pair first.
-   */
-  for (int i = 0; i < num_regs; i++) {
-    if (FpRegs[i].double_start) {
-      FpRegs[i].count *= 2;
-    }
-  }
 
   // Sort the count arrays
   qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts);
-  qsort(FpRegs, num_regs, sizeof(RefCounts), SortCounts);
+  qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts);
 
   if (cu_->verbose) {
     DumpCounts(core_regs, num_regs, "Core regs after sort");
-    DumpCounts(FpRegs, num_regs, "Fp regs after sort");
+    DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort");
   }
 
   if (!(cu_->disable_opt & (1 << kPromoteRegs))) {
     // Promote FpRegs
-    for (int i = 0; (i < num_regs) && (FpRegs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(FpRegs[i].s_reg);
-      if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
-        int reg = AllocPreservedFPReg(FpRegs[i].s_reg,
-          FpRegs[i].double_start);
+    for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) {
+      int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG);
+      if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
+        if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) &&
+            (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) {
+          int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG;
+          // Ignore result - if can't alloc double may still be able to alloc singles.
+          AllocPreservedDouble(low_sreg);
+        }
+      } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
+        int reg = AllocPreservedSingle(FpRegs[i].s_reg);
         if (reg < 0) {
-          break;  // No more left
+          break;  // No more left.
         }
       }
     }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e883432..064ff31 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -362,6 +362,7 @@
 }
 
 int X86Mir2Lir::GetInsnSize(LIR* lir) {
+  DCHECK(!IsPseudoLirOp(lir->opcode));
   const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode];
   switch (entry->kind) {
     case kData:
@@ -1166,7 +1167,7 @@
 
   const bool kVerbosePcFixup = false;
   for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
-    if (lir->opcode < 0) {
+    if (IsPseudoLirOp(lir->opcode)) {
       continue;
     }
 
@@ -1174,7 +1175,7 @@
       continue;
     }
 
-    if (lir->flags.pcRelFixup) {
+    if (lir->flags.fixup != kFixupNone) {
       switch (lir->opcode) {
         case kX86Jcc8: {
           LIR *target_lir = lir->target;
@@ -1237,7 +1238,7 @@
           delta = target - pc;
           if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && delta == 0) {
             // Useless branch
-            lir->flags.is_nop = true;
+            NopLIR(lir);
             if (kVerbosePcFixup) {
               LOG(INFO) << "Retry for useless branch at " << lir->offset;
             }
@@ -1385,4 +1386,97 @@
   return res;
 }
 
+// LIR offset assignment.
+// TODO: consolidate w/ Arm assembly mechanism.
+int X86Mir2Lir::AssignInsnOffsets() {
+  LIR* lir;
+  int offset = 0;
+
+  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
+    lir->offset = offset;
+    if (LIKELY(!IsPseudoLirOp(lir->opcode))) {
+      if (!lir->flags.is_nop) {
+        offset += lir->flags.size;
+      }
+    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+      if (offset & 0x2) {
+        offset += 2;
+        lir->operands[0] = 1;
+      } else {
+        lir->operands[0] = 0;
+      }
+    }
+    /* Pseudo opcodes don't consume space */
+  }
+  return offset;
+}
+
+/*
+ * Walk the compilation unit and assign offsets to instructions
+ * and literals and compute the total size of the compiled unit.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void X86Mir2Lir::AssignOffsets() {
+  int offset = AssignInsnOffsets();
+
+  /* Const values have to be word aligned */
+  offset = (offset + 3) & ~3;
+
+  /* Set up offsets for literals */
+  data_offset_ = offset;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  offset = AssignFillArrayDataOffset(offset);
+
+  total_size_ = offset;
+}
+
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void X86Mir2Lir::AssembleLIR() {
+  AssignOffsets();
+  int assembler_retries = 0;
+  /*
+   * Assemble here.  Note that we generate code with optimistic assumptions
+   * and if found now to work, we'll have to redo the sequence and retry.
+   */
+
+  while (true) {
+    AssemblerStatus res = AssembleInstructions(0);
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      // Redo offsets and try again
+      AssignOffsets();
+      code_buffer_.clear();
+    }
+  }
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  CreateMappingTables();
+
+  CreateNativeGcMap();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 2be2aa9..7fad6f0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -150,43 +150,6 @@
                           rX86_ARG1, true);
 }
 
-void X86Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rCX);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rCX, opt_flags);
-  // If lock is unheld, try to grab it quickly with compare and exchange
-  // TODO: copy and clear hash state?
-  NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
-  NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
-  NewLIR2(kX86Xor32RR, rAX, rAX);
-  NewLIR3(kX86LockCmpxchgMR, rCX, mirror::Object::MonitorOffset().Int32Value(), rDX);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-  // If lock is held, go the expensive route - artLockObjectFromCode(self, obj);
-  CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pLockObject), rCX, true);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-}
-
-void X86Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rAX);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rAX, opt_flags);
-  // If lock is held by the current thread, clear it to quickly release it
-  // TODO: clear hash state?
-  NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
-  NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
-  NewLIR3(kX86Mov32RM, rCX, rAX, mirror::Object::MonitorOffset().Int32Value());
-  OpRegReg(kOpSub, rCX, rDX);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-  NewLIR3(kX86Mov32MR, rAX, mirror::Object::MonitorOffset().Int32Value(), rCX);
-  LIR* branch2 = NewLIR1(kX86Jmp8, 0);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-  // Otherwise, go the expensive route - UnlockObjectFromCode(obj);
-  CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rAX, true);
-  branch2->target = NewLIR0(kPseudoTargetLabel);
-}
-
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 478654d..c266e39 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -52,7 +52,6 @@
     int AllocTypedTempPair(bool fp_hint, int reg_class);
     int S2d(int low_reg, int high_reg);
     int TargetReg(SpecialTargetRegister reg);
-    RegisterInfo* GetRegInfo(int reg);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -72,9 +71,12 @@
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
+    void AssembleLIR();
+    int AssignInsnOffsets();
+    void AssignOffsets();
     AssemblerStatus AssembleInstructions(uintptr_t start_addr);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
@@ -86,14 +88,12 @@
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                                RegLocation rl_index, RegLocation rl_src, int scale);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale);
     void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale);
+                     RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_shift);
+                           RegLocation rl_src1, RegLocation rl_shift);
     void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -124,8 +124,6 @@
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
     void GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result,
                                                int lit, int first_bit, int second_bit);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 14be7dd..a9f2c59 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -419,7 +419,7 @@
  * Generate array load
  */
 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale) {
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -466,7 +466,7 @@
  *
  */
 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -502,59 +502,10 @@
     StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg,
                          rl_src.high_reg, size, INVALID_SREG);
   }
-}
-
-/*
- * Generate array store
- *
- */
-void X86Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value();
-
-  FlushAllRegs();  // Use explicit registers
-  LockCallTemps();
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(rl_index, r_index);  // Grab index
-
-  GenNullCheck(rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  // make an extra temp available for card mark below
-  FreeTemp(TargetReg(kArg1));
-  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-    /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
-    GenRegMemCheck(kCondUge, r_index, r_array, len_offset, kThrowArrayBounds);
-  }
-  StoreBaseIndexedDisp(r_array, r_index, scale,
-                       data_offset, r_value, INVALID_REG, kWord, INVALID_SREG);
-  FreeTemp(r_index);
-  if (!mir_graph_->IsConstantNullRef(rl_src)) {
-    MarkGCCard(r_value, r_array);
+  if (card_mark) {
+    // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
+    FreeTemp(rl_index.low_reg);
+    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
   }
 }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 26accab..0f005da 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -132,37 +132,36 @@
   return 0ULL;
 }
 
-void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir) {
+void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kX86);
+  DCHECK(!lir->flags.use_def_invalid);
 
   // X86-specific resource map setup here.
-  uint64_t flags = X86Mir2Lir::EncodingMap[lir->opcode].flags;
-
   if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_X86_REG_SP;
+    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
   }
 
   if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_X86_REG_SP;
+    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
   }
 
   if (flags & REG_DEFA) {
-    SetupRegMask(&lir->def_mask, rAX);
+    SetupRegMask(&lir->u.m.def_mask, rAX);
   }
 
   if (flags & REG_DEFD) {
-    SetupRegMask(&lir->def_mask, rDX);
+    SetupRegMask(&lir->u.m.def_mask, rDX);
   }
   if (flags & REG_USEA) {
-    SetupRegMask(&lir->use_mask, rAX);
+    SetupRegMask(&lir->u.m.use_mask, rAX);
   }
 
   if (flags & REG_USEC) {
-    SetupRegMask(&lir->use_mask, rCX);
+    SetupRegMask(&lir->u.m.use_mask, rCX);
   }
 
   if (flags & REG_USED) {
-    SetupRegMask(&lir->use_mask, rDX);
+    SetupRegMask(&lir->u.m.use_mask, rDX);
   }
 }
 
@@ -275,8 +274,8 @@
     }
     /* Memory bits */
     if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", x86LIR->alias_info & 0xffff,
-              (x86LIR->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
+              (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
@@ -375,11 +374,6 @@
   return res;
 }
 
-X86Mir2Lir::RegisterInfo* X86Mir2Lir::GetRegInfo(int reg) {
-  return X86_FPREG(reg) ? &reg_pool_->FPRegs[reg & X86_FP_REG_MASK]
-                    : &reg_pool_->core_regs[reg];
-}
-
 /* To be used when explicitly managing register use */
 void X86Mir2Lir::LockCallTemps() {
   LockTemp(rX86_ARG0);
@@ -530,14 +524,17 @@
 }
 
 uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return X86Mir2Lir::EncodingMap[opcode].flags;
 }
 
 const char* X86Mir2Lir::GetTargetInstName(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return X86Mir2Lir::EncodingMap[opcode].name;
 }
 
 const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
   return X86Mir2Lir::EncodingMap[opcode].fmt;
 }
 
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index cd1602f..0ca5fd4 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 void MIRGraph::ClearAllVisitedFlags() {
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     bb->visited = false;
   }
@@ -145,11 +145,11 @@
     def_block_matrix_[i] =
         new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapBMatrix);
   }
-  AllNodesIterator iter(this, false /* not iterative */);
+  AllNodesIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     FindLocalLiveIn(bb);
   }
-  AllNodesIterator iter2(this, false /* not iterative */);
+  AllNodesIterator iter2(this);
   for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
     FillDefBlockMatrix(bb);
   }
@@ -179,7 +179,7 @@
   bb->visited = true;
   work_stack.push_back(std::make_pair(bb, new (arena_) ArenaBitVector::Iterator(bb->i_dominated)));
   while (!work_stack.empty()) {
-    std::pair<BasicBlock*, ArenaBitVector::Iterator*> curr = work_stack.back();
+    const std::pair<BasicBlock*, ArenaBitVector::Iterator*>& curr = work_stack.back();
     BasicBlock* curr_bb = curr.first;
     ArenaBitVector::Iterator* curr_idom_iter = curr.second;
     int bb_idx = curr_idom_iter->Next();
@@ -377,7 +377,7 @@
   int num_total_blocks = GetBasicBlockListCount();
 
   /* Initialize domination-related data structures */
-  ReachableNodesIterator iter(this, false /* not iterative */);
+  PreOrderDfsIterator iter(this);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     InitializeDominationInfo(bb);
   }
@@ -396,7 +396,7 @@
   i_dom_list_[GetEntryBlock()->dfs_id] = GetEntryBlock()->dfs_id;
 
   /* Compute the immediate dominators */
-  ReversePostOrderDfsIterator iter2(this, true /* iterative */);
+  RepeatingReversePostOrderDfsIterator iter2(this);
   bool change = false;
   for (BasicBlock* bb = iter2.Next(false); bb != NULL; bb = iter2.Next(change)) {
     change = ComputeblockIDom(bb);
@@ -414,19 +414,19 @@
   }
   GetEntryBlock()->i_dom = NULL;
 
-  ReachableNodesIterator iter3(this, false /* not iterative */);
+  PreOrderDfsIterator iter3(this);
   for (BasicBlock* bb = iter3.Next(); bb != NULL; bb = iter3.Next()) {
     SetDominators(bb);
   }
 
-  ReversePostOrderDfsIterator iter4(this, false /* not iterative */);
+  ReversePostOrderDfsIterator iter4(this);
   for (BasicBlock* bb = iter4.Next(); bb != NULL; bb = iter4.Next()) {
     ComputeBlockDominators(bb);
   }
 
   // Compute the dominance frontier for each block.
   ComputeDomPostOrderTraversal(GetEntryBlock());
-  PostOrderDOMIterator iter5(this, false /* not iterative */);
+  PostOrderDOMIterator iter5(this);
   for (BasicBlock* bb = iter5.Next(); bb != NULL; bb = iter5.Next()) {
     ComputeDominanceFrontier(bb);
   }
@@ -503,7 +503,7 @@
   temp_dalvik_register_v_ =
       new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapRegisterV);
 
-  PostOrderDfsIterator iter(this, true /* iterative */);
+  RepeatingPostOrderDfsIterator iter(this);
   bool change = false;
   for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) {
     change = ComputeBlockLiveIns(bb);
@@ -700,7 +700,7 @@
       new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false, kBitMapTempSSARegisterV);
 
   /* Insert phi-operands with latest SSA names from predecessor blocks */
-  ReachableNodesIterator iter2(this, false /* not iterative */);
+  PreOrderDfsIterator iter2(this);
   for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
     InsertPhiNodeOperands(bb);
   }
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 07f37bb..32fac0b 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -29,6 +29,16 @@
   return change;
 }
 
+bool MIRGraph::SetFp(int index) {
+  bool change = false;
+  if (!reg_location_[index].fp) {
+    reg_location_[index].fp = true;
+    reg_location_[index].defined = true;
+    change = true;
+  }
+  return change;
+}
+
 bool MIRGraph::SetCore(int index, bool is_core) {
   bool change = false;
   if (is_core && !reg_location_[index].defined) {
@@ -39,6 +49,16 @@
   return change;
 }
 
+bool MIRGraph::SetCore(int index) {
+  bool change = false;
+  if (!reg_location_[index].defined) {
+    reg_location_[index].core = true;
+    reg_location_[index].defined = true;
+    change = true;
+  }
+  return change;
+}
+
 bool MIRGraph::SetRef(int index, bool is_ref) {
   bool change = false;
   if (is_ref && !reg_location_[index].defined) {
@@ -49,6 +69,16 @@
   return change;
 }
 
+bool MIRGraph::SetRef(int index) {
+  bool change = false;
+  if (!reg_location_[index].defined) {
+    reg_location_[index].ref = true;
+    reg_location_[index].defined = true;
+    change = true;
+  }
+  return change;
+}
+
 bool MIRGraph::SetWide(int index, bool is_wide) {
   bool change = false;
   if (is_wide && !reg_location_[index].wide) {
@@ -58,6 +88,15 @@
   return change;
 }
 
+bool MIRGraph::SetWide(int index) {
+  bool change = false;
+  if (!reg_location_[index].wide) {
+    reg_location_[index].wide = true;
+    change = true;
+  }
+  return change;
+}
+
 bool MIRGraph::SetHigh(int index, bool is_high) {
   bool change = false;
   if (is_high && !reg_location_[index].high_word) {
@@ -67,6 +106,16 @@
   return change;
 }
 
+bool MIRGraph::SetHigh(int index) {
+  bool change = false;
+  if (!reg_location_[index].high_word) {
+    reg_location_[index].high_word = true;
+    change = true;
+  }
+  return change;
+}
+
+
 /*
  * Infer types and sizes.  We don't need to track change on sizes,
  * as it doesn't propagate.  We're guaranteed at least one pass through
@@ -84,21 +133,23 @@
     SSARepresentation *ssa_rep = mir->ssa_rep;
     if (ssa_rep) {
       int attrs = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      const int* uses = ssa_rep->uses;
+      const int* defs = ssa_rep->defs;
 
       // Handle defs
       if (attrs & DF_DA) {
         if (attrs & DF_CORE_A) {
-          changed |= SetCore(ssa_rep->defs[0], true);
+          changed |= SetCore(defs[0]);
         }
         if (attrs & DF_REF_A) {
-          changed |= SetRef(ssa_rep->defs[0], true);
+          changed |= SetRef(defs[0]);
         }
         if (attrs & DF_A_WIDE) {
-          reg_location_[ssa_rep->defs[0]].wide = true;
-          reg_location_[ssa_rep->defs[1]].wide = true;
-          reg_location_[ssa_rep->defs[1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(ssa_rep->defs[0])+1,
-          SRegToVReg(ssa_rep->defs[1]));
+          reg_location_[defs[0]].wide = true;
+          reg_location_[defs[1]].wide = true;
+          reg_location_[defs[1]].high_word = true;
+          DCHECK_EQ(SRegToVReg(defs[0])+1,
+          SRegToVReg(defs[1]));
         }
       }
 
@@ -106,17 +157,17 @@
       int next = 0;
       if (attrs & DF_UA) {
         if (attrs & DF_CORE_A) {
-          changed |= SetCore(ssa_rep->uses[next], true);
+          changed |= SetCore(uses[next]);
         }
         if (attrs & DF_REF_A) {
-          changed |= SetRef(ssa_rep->uses[next], true);
+          changed |= SetRef(uses[next]);
         }
         if (attrs & DF_A_WIDE) {
-          reg_location_[ssa_rep->uses[next]].wide = true;
-          reg_location_[ssa_rep->uses[next + 1]].wide = true;
-          reg_location_[ssa_rep->uses[next + 1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(ssa_rep->uses[next])+1,
-          SRegToVReg(ssa_rep->uses[next + 1]));
+          reg_location_[uses[next]].wide = true;
+          reg_location_[uses[next + 1]].wide = true;
+          reg_location_[uses[next + 1]].high_word = true;
+          DCHECK_EQ(SRegToVReg(uses[next])+1,
+          SRegToVReg(uses[next + 1]));
           next += 2;
         } else {
           next++;
@@ -124,17 +175,17 @@
       }
       if (attrs & DF_UB) {
         if (attrs & DF_CORE_B) {
-          changed |= SetCore(ssa_rep->uses[next], true);
+          changed |= SetCore(uses[next]);
         }
         if (attrs & DF_REF_B) {
-          changed |= SetRef(ssa_rep->uses[next], true);
+          changed |= SetRef(uses[next]);
         }
         if (attrs & DF_B_WIDE) {
-          reg_location_[ssa_rep->uses[next]].wide = true;
-          reg_location_[ssa_rep->uses[next + 1]].wide = true;
-          reg_location_[ssa_rep->uses[next + 1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(ssa_rep->uses[next])+1,
-                               SRegToVReg(ssa_rep->uses[next + 1]));
+          reg_location_[uses[next]].wide = true;
+          reg_location_[uses[next + 1]].wide = true;
+          reg_location_[uses[next + 1]].high_word = true;
+          DCHECK_EQ(SRegToVReg(uses[next])+1,
+                               SRegToVReg(uses[next + 1]));
           next += 2;
         } else {
           next++;
@@ -142,17 +193,17 @@
       }
       if (attrs & DF_UC) {
         if (attrs & DF_CORE_C) {
-          changed |= SetCore(ssa_rep->uses[next], true);
+          changed |= SetCore(uses[next]);
         }
         if (attrs & DF_REF_C) {
-          changed |= SetRef(ssa_rep->uses[next], true);
+          changed |= SetRef(uses[next]);
         }
         if (attrs & DF_C_WIDE) {
-          reg_location_[ssa_rep->uses[next]].wide = true;
-          reg_location_[ssa_rep->uses[next + 1]].wide = true;
-          reg_location_[ssa_rep->uses[next + 1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(ssa_rep->uses[next])+1,
-          SRegToVReg(ssa_rep->uses[next + 1]));
+          reg_location_[uses[next]].wide = true;
+          reg_location_[uses[next + 1]].wide = true;
+          reg_location_[uses[next + 1]].high_word = true;
+          DCHECK_EQ(SRegToVReg(uses[next])+1,
+          SRegToVReg(uses[next + 1]));
         }
       }
 
@@ -162,27 +213,27 @@
           (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT)) {
         switch (cu_->shorty[0]) {
             case 'I':
-              changed |= SetCore(ssa_rep->uses[0], true);
+              changed |= SetCore(uses[0]);
               break;
             case 'J':
-              changed |= SetCore(ssa_rep->uses[0], true);
-              changed |= SetCore(ssa_rep->uses[1], true);
-              reg_location_[ssa_rep->uses[0]].wide = true;
-              reg_location_[ssa_rep->uses[1]].wide = true;
-              reg_location_[ssa_rep->uses[1]].high_word = true;
+              changed |= SetCore(uses[0]);
+              changed |= SetCore(uses[1]);
+              reg_location_[uses[0]].wide = true;
+              reg_location_[uses[1]].wide = true;
+              reg_location_[uses[1]].high_word = true;
               break;
             case 'F':
-              changed |= SetFp(ssa_rep->uses[0], true);
+              changed |= SetFp(uses[0]);
               break;
             case 'D':
-              changed |= SetFp(ssa_rep->uses[0], true);
-              changed |= SetFp(ssa_rep->uses[1], true);
-              reg_location_[ssa_rep->uses[0]].wide = true;
-              reg_location_[ssa_rep->uses[1]].wide = true;
-              reg_location_[ssa_rep->uses[1]].high_word = true;
+              changed |= SetFp(uses[0]);
+              changed |= SetFp(uses[1]);
+              reg_location_[uses[0]].wide = true;
+              reg_location_[uses[1]].wide = true;
+              reg_location_[uses[1]].high_word = true;
               break;
             case 'L':
-              changed |= SetRef(ssa_rep->uses[0], true);
+              changed |= SetRef(uses[0]);
               break;
             default: break;
         }
@@ -206,10 +257,10 @@
             SSARepresentation* tgt_rep = move_result_mir->ssa_rep;
             DCHECK(tgt_rep != NULL);
             tgt_rep->fp_def[0] = true;
-            changed |= SetFp(tgt_rep->defs[0], true);
+            changed |= SetFp(tgt_rep->defs[0]);
             if (shorty[0] == 'D') {
               tgt_rep->fp_def[1] = true;
-              changed |= SetFp(tgt_rep->defs[1], true);
+              changed |= SetFp(tgt_rep->defs[1]);
             }
           }
         }
@@ -217,8 +268,8 @@
         // If this is a non-static invoke, mark implicit "this"
         if (((mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC) &&
             (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) {
-          reg_location_[ssa_rep->uses[next]].defined = true;
-          reg_location_[ssa_rep->uses[next]].ref = true;
+          reg_location_[uses[next]].defined = true;
+          reg_location_[uses[next]].ref = true;
           next++;
         }
         uint32_t cpos = 1;
@@ -229,28 +280,28 @@
               case 'D':
                 ssa_rep->fp_use[i] = true;
                 ssa_rep->fp_use[i+1] = true;
-                reg_location_[ssa_rep->uses[i]].wide = true;
-                reg_location_[ssa_rep->uses[i+1]].wide = true;
-                reg_location_[ssa_rep->uses[i+1]].high_word = true;
-                DCHECK_EQ(SRegToVReg(ssa_rep->uses[i])+1, SRegToVReg(ssa_rep->uses[i+1]));
+                reg_location_[uses[i]].wide = true;
+                reg_location_[uses[i+1]].wide = true;
+                reg_location_[uses[i+1]].high_word = true;
+                DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1]));
                 i++;
                 break;
               case 'J':
-                reg_location_[ssa_rep->uses[i]].wide = true;
-                reg_location_[ssa_rep->uses[i+1]].wide = true;
-                reg_location_[ssa_rep->uses[i+1]].high_word = true;
-                DCHECK_EQ(SRegToVReg(ssa_rep->uses[i])+1, SRegToVReg(ssa_rep->uses[i+1]));
-                changed |= SetCore(ssa_rep->uses[i], true);
+                reg_location_[uses[i]].wide = true;
+                reg_location_[uses[i+1]].wide = true;
+                reg_location_[uses[i+1]].high_word = true;
+                DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1]));
+                changed |= SetCore(uses[i]);
                 i++;
                 break;
               case 'F':
                 ssa_rep->fp_use[i] = true;
                 break;
               case 'L':
-                changed |= SetRef(ssa_rep->uses[i], true);
+                changed |= SetRef(uses[i]);
                 break;
               default:
-                changed |= SetCore(ssa_rep->uses[i], true);
+                changed |= SetCore(uses[i]);
                 break;
             }
             i++;
@@ -260,11 +311,11 @@
 
       for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) {
         if (ssa_rep->fp_use[i])
-          changed |= SetFp(ssa_rep->uses[i], true);
+          changed |= SetFp(uses[i]);
         }
       for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) {
         if (ssa_rep->fp_def[i])
-          changed |= SetFp(ssa_rep->defs[i], true);
+          changed |= SetFp(defs[i]);
         }
       // Special-case handling for moves & Phi
       if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
@@ -276,14 +327,14 @@
          */
         bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) ==
                       kMirOpPhi);
-        RegLocation rl_temp = reg_location_[ssa_rep->defs[0]];
+        RegLocation rl_temp = reg_location_[defs[0]];
         bool defined_fp = rl_temp.defined && rl_temp.fp;
         bool defined_core = rl_temp.defined && rl_temp.core;
         bool defined_ref = rl_temp.defined && rl_temp.ref;
         bool is_wide = rl_temp.wide || ((attrs & DF_A_WIDE) != 0);
         bool is_high = is_phi && rl_temp.wide && rl_temp.high_word;
         for (int i = 0; i < ssa_rep->num_uses; i++) {
-          rl_temp = reg_location_[ssa_rep->uses[i]];
+          rl_temp = reg_location_[uses[i]];
           defined_fp |= rl_temp.defined && rl_temp.fp;
           defined_core |= rl_temp.defined && rl_temp.core;
           defined_ref |= rl_temp.defined && rl_temp.ref;
@@ -303,26 +354,26 @@
                        << " has both fp and core/ref uses for same def.";
           cu_->disable_opt |= (1 << kPromoteRegs);
         }
-        changed |= SetFp(ssa_rep->defs[0], defined_fp);
-        changed |= SetCore(ssa_rep->defs[0], defined_core);
-        changed |= SetRef(ssa_rep->defs[0], defined_ref);
-        changed |= SetWide(ssa_rep->defs[0], is_wide);
-        changed |= SetHigh(ssa_rep->defs[0], is_high);
+        changed |= SetFp(defs[0], defined_fp);
+        changed |= SetCore(defs[0], defined_core);
+        changed |= SetRef(defs[0], defined_ref);
+        changed |= SetWide(defs[0], is_wide);
+        changed |= SetHigh(defs[0], is_high);
         if (attrs & DF_A_WIDE) {
-          changed |= SetWide(ssa_rep->defs[1], true);
-          changed |= SetHigh(ssa_rep->defs[1], true);
+          changed |= SetWide(defs[1]);
+          changed |= SetHigh(defs[1]);
         }
         for (int i = 0; i < ssa_rep->num_uses; i++) {
-          changed |= SetFp(ssa_rep->uses[i], defined_fp);
-          changed |= SetCore(ssa_rep->uses[i], defined_core);
-          changed |= SetRef(ssa_rep->uses[i], defined_ref);
-          changed |= SetWide(ssa_rep->uses[i], is_wide);
-          changed |= SetHigh(ssa_rep->uses[i], is_high);
+          changed |= SetFp(uses[i], defined_fp);
+          changed |= SetCore(uses[i], defined_core);
+          changed |= SetRef(uses[i], defined_ref);
+          changed |= SetWide(uses[i], is_wide);
+          changed |= SetHigh(uses[i], is_high);
         }
         if (attrs & DF_A_WIDE) {
           DCHECK_EQ(ssa_rep->num_uses, 2);
-          changed |= SetWide(ssa_rep->uses[1], true);
-          changed |= SetHigh(ssa_rep->uses[1], true);
+          changed |= SetWide(uses[1]);
+          changed |= SetHigh(uses[1]);
         }
       }
     }
@@ -444,7 +495,7 @@
   }
 
   /* Do type & size inference pass */
-  PreOrderDfsIterator iter(this, true /* iterative */);
+  RepeatingPreOrderDfsIterator iter(this);
   bool change = false;
   for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) {
     change = InferTypeAndSize(bb);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 6eabeed..7c4a6ce 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -64,7 +64,7 @@
   if (x == 0 && y == 0) {
     return;
   }
-  VLOG(compiler) << Percentage(x, y) << "% of " << str << " for " << (x + y) << " cases";
+  LOG(INFO) << Percentage(x, y) << "% of " << str << " for " << (x + y) << " cases";
 }
 
 class AOTCompilationStats {
@@ -355,7 +355,11 @@
       jni_compiler_(NULL),
       compiler_enable_auto_elf_loading_(NULL),
       compiler_get_method_code_addr_(NULL),
-      support_boot_image_fixup_(true) {
+      support_boot_image_fixup_(true),
+      dedupe_code_("dedupe code"),
+      dedupe_mapping_table_("dedupe mapping table"),
+      dedupe_vmap_table_("dedupe vmap table"),
+      dedupe_gc_map_("dedupe gc map") {
 
   CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key");
 
@@ -596,12 +600,11 @@
   UpdateImageClasses(timings);
 }
 
-bool CompilerDriver::IsImageClass(const char* descriptor) const {
-  DCHECK(descriptor != NULL);
+bool CompilerDriver::IsImageClass(const StringPiece& descriptor) const {
   if (!IsImage()) {
     return true;
   } else {
-    return image_classes_->find(descriptor) != image_classes_->end();
+    return image_classes_->find(descriptor.data()) != image_classes_->end();
   }
 }
 
@@ -776,7 +779,8 @@
 
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file,
                                                       uint32_t type_idx) {
-  if (IsImage() && IsImageClass(dex_file.GetTypeDescriptor(dex_file.GetTypeId(type_idx)))) {
+  if (IsImage() &&
+      IsImageClass(dex_file.StringDataAsStringPieceByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
     if (kIsDebugBuild) {
       ScopedObjectAccess soa(Thread::Current());
       mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
@@ -912,9 +916,9 @@
 }
 
 static mirror::ArtMethod* ComputeMethodReferencedFromCompilingMethod(ScopedObjectAccess& soa,
-                                                                          const DexCompilationUnit* mUnit,
-                                                                          uint32_t method_idx,
-                                                                          InvokeType type)
+                                                                     const DexCompilationUnit* mUnit,
+                                                                     uint32_t method_idx,
+                                                                     InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
   mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
@@ -923,11 +927,11 @@
 }
 
 bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                              int& field_offset, bool& is_volatile, bool is_put) {
+                                              bool is_put, int* field_offset, bool* is_volatile) {
   ScopedObjectAccess soa(Thread::Current());
   // Conservative defaults.
-  field_offset = -1;
-  is_volatile = true;
+  *field_offset = -1;
+  *is_volatile = true;
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie is static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && !resolved_field->IsStatic()) {
@@ -954,8 +958,8 @@
       bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal() &&
           fields_class != referrer_class;
       if (access_ok && !is_write_to_final_from_wrong_class) {
-        field_offset = resolved_field->GetOffset().Int32Value();
-        is_volatile = resolved_field->IsVolatile();
+        *field_offset = resolved_field->GetOffset().Int32Value();
+        *is_volatile = resolved_field->IsVolatile();
         stats_->ResolvedInstanceField();
         return true;  // Fast path.
       }
@@ -970,15 +974,14 @@
 }
 
 bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                            int& field_offset, int& ssb_index,
-                                            bool& is_referrers_class, bool& is_volatile,
-                                            bool is_put) {
+                                            bool is_put, int* field_offset, int* ssb_index,
+                                            bool* is_referrers_class, bool* is_volatile) {
   ScopedObjectAccess soa(Thread::Current());
   // Conservative defaults.
-  field_offset = -1;
-  ssb_index = -1;
-  is_referrers_class = false;
-  is_volatile = true;
+  *field_offset = -1;
+  *ssb_index = -1;
+  *is_referrers_class = false;
+  *is_volatile = true;
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie isn't static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && resolved_field->IsStatic()) {
@@ -988,9 +991,9 @@
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
       if (fields_class == referrer_class) {
-        is_referrers_class = true;  // implies no worrying about class initialization
-        field_offset = resolved_field->GetOffset().Int32Value();
-        is_volatile = resolved_field->IsVolatile();
+        *is_referrers_class = true;  // implies no worrying about class initialization
+        *field_offset = resolved_field->GetOffset().Int32Value();
+        *is_volatile = resolved_field->IsVolatile();
         stats_->ResolvedLocalStaticField();
         return true;  // fast path
       } else {
@@ -1021,9 +1024,9 @@
           if (fields_class->GetDexCache() == dex_cache) {
             // common case where the dex cache of both the referrer and the field are the same,
             // no need to search the dex file
-            ssb_index = fields_class->GetDexTypeIndex();
-            field_offset = resolved_field->GetOffset().Int32Value();
-            is_volatile = resolved_field->IsVolatile();
+            *ssb_index = fields_class->GetDexTypeIndex();
+            *field_offset = resolved_field->GetOffset().Int32Value();
+            *is_volatile = resolved_field->IsVolatile();
             stats_->ResolvedStaticField();
             return true;
           }
@@ -1036,9 +1039,9 @@
                mUnit->GetDexFile()->FindTypeId(mUnit->GetDexFile()->GetIndexForStringId(*string_id));
             if (type_id != NULL) {
               // medium path, needs check of static storage base being initialized
-              ssb_index = mUnit->GetDexFile()->GetIndexForTypeId(*type_id);
-              field_offset = resolved_field->GetOffset().Int32Value();
-              is_volatile = resolved_field->IsVolatile();
+              *ssb_index = mUnit->GetDexFile()->GetIndexForTypeId(*type_id);
+              *field_offset = resolved_field->GetOffset().Int32Value();
+              *is_volatile = resolved_field->IsVolatile();
               stats_->ResolvedStaticField();
               return true;
             }
@@ -1058,15 +1061,15 @@
 void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type,
                                                    mirror::Class* referrer_class,
                                                    mirror::ArtMethod* method,
-                                                   uintptr_t& direct_code,
-                                                   uintptr_t& direct_method,
-                                                   bool update_stats) {
+                                                   bool update_stats,
+                                                   uintptr_t* direct_code,
+                                                   uintptr_t* direct_method) {
   // For direct and static methods compute possible direct_code and direct_method values, ie
   // an address for the Method* being invoked and an address of the code for that Method*.
   // For interface calls compute a value for direct_method that is the interface method being
   // invoked, so this can be passed to the out-of-line runtime support code.
-  direct_code = 0;
-  direct_method = 0;
+  *direct_code = 0;
+  *direct_method = 0;
   if (compiler_backend_ == kPortable) {
     if (sharp_type != kStatic && sharp_type != kDirect) {
       return;
@@ -1095,41 +1098,40 @@
   if (compiling_boot) {
     if (support_boot_image_fixup_) {
       MethodHelper mh(method);
-      if (IsImageClass(mh.GetDeclaringClassDescriptor())) {
+      if (IsImageClass(mh.GetDeclaringClassDescriptorAsStringPiece())) {
         // We can only branch directly to Methods that are resolved in the DexCache.
         // Otherwise we won't invoke the resolution trampoline.
-        direct_method = -1;
-        direct_code = -1;
+        *direct_method = -1;
+        *direct_code = -1;
       }
     }
   } else {
     if (Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace()) {
-      direct_method = reinterpret_cast<uintptr_t>(method);
+      *direct_method = reinterpret_cast<uintptr_t>(method);
     }
-    direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode());
+    *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode());
   }
 }
 
 bool CompilerDriver::ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc,
-                                       InvokeType& invoke_type,
-                                       MethodReference& target_method,
-                                       int& vtable_idx,
-                                       uintptr_t& direct_code, uintptr_t& direct_method,
-                                       bool update_stats) {
+                                       bool update_stats, bool enable_devirtualization,
+                                       InvokeType* invoke_type, MethodReference* target_method,
+                                       int* vtable_idx, uintptr_t* direct_code,
+                                       uintptr_t* direct_method) {
   ScopedObjectAccess soa(Thread::Current());
-  vtable_idx = -1;
-  direct_code = 0;
-  direct_method = 0;
+  *vtable_idx = -1;
+  *direct_code = 0;
+  *direct_method = 0;
   mirror::ArtMethod* resolved_method =
-      ComputeMethodReferencedFromCompilingMethod(soa, mUnit, target_method.dex_method_index,
-                                                 invoke_type);
+      ComputeMethodReferencedFromCompilingMethod(soa, mUnit, target_method->dex_method_index,
+                                                 *invoke_type);
   if (resolved_method != NULL) {
     // Don't try to fast-path if we don't understand the caller's class or this appears to be an
     // Incompatible Class Change Error.
     mirror::Class* referrer_class =
         ComputeCompilingMethodsClass(soa, resolved_method->GetDeclaringClass()->GetDexCache(),
                                      mUnit);
-    bool icce = resolved_method->CheckIncompatibleClassChange(invoke_type);
+    bool icce = resolved_method->CheckIncompatibleClassChange(*invoke_type);
     if (referrer_class != NULL && !icce) {
       mirror::Class* methods_class = resolved_method->GetDeclaringClass();
       if (!referrer_class->CanAccess(methods_class) ||
@@ -1140,42 +1142,42 @@
         // method public. Resort to the dex file to determine the correct class for the access
         // check.
         uint16_t class_idx =
-            target_method.dex_file->GetMethodId(target_method.dex_method_index).class_idx_;
-        methods_class = mUnit->GetClassLinker()->ResolveType(*target_method.dex_file,
+            target_method->dex_file->GetMethodId(target_method->dex_method_index).class_idx_;
+        methods_class = mUnit->GetClassLinker()->ResolveType(*target_method->dex_file,
                                                              class_idx, referrer_class);
       }
       if (referrer_class->CanAccess(methods_class) &&
           referrer_class->CanAccessMember(methods_class, resolved_method->GetAccessFlags())) {
-        const bool kEnableFinalBasedSharpening = true;
+        const bool enableFinalBasedSharpening = enable_devirtualization;
         // Sharpen a virtual call into a direct call when the target is known not to have been
         // overridden (ie is final).
         bool can_sharpen_virtual_based_on_type =
-            (invoke_type == kVirtual) && (resolved_method->IsFinal() || methods_class->IsFinal());
+            (*invoke_type == kVirtual) && (resolved_method->IsFinal() || methods_class->IsFinal());
         // For invoke-super, ensure the vtable index will be correct to dispatch in the vtable of
         // the super class.
-        bool can_sharpen_super_based_on_type = (invoke_type == kSuper) &&
+        bool can_sharpen_super_based_on_type = (*invoke_type == kSuper) &&
             (referrer_class != methods_class) && referrer_class->IsSubClass(methods_class) &&
             resolved_method->GetMethodIndex() < methods_class->GetVTable()->GetLength() &&
             (methods_class->GetVTable()->Get(resolved_method->GetMethodIndex()) == resolved_method);
 
-        if (kEnableFinalBasedSharpening && (can_sharpen_virtual_based_on_type ||
+        if (enableFinalBasedSharpening && (can_sharpen_virtual_based_on_type ||
                                             can_sharpen_super_based_on_type)) {
           // Sharpen a virtual call into a direct call. The method_idx is into referrer's
           // dex cache, check that this resolved method is where we expect it.
-          CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method.dex_method_index) ==
+          CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method->dex_method_index) ==
                 resolved_method) << PrettyMethod(resolved_method);
           if (update_stats) {
-            stats_->ResolvedMethod(invoke_type);
-            stats_->VirtualMadeDirect(invoke_type);
+            stats_->ResolvedMethod(*invoke_type);
+            stats_->VirtualMadeDirect(*invoke_type);
           }
-          GetCodeAndMethodForDirectCall(invoke_type, kDirect, referrer_class, resolved_method,
-                                        direct_code, direct_method, update_stats);
-          invoke_type = kDirect;
+          GetCodeAndMethodForDirectCall(*invoke_type, kDirect, referrer_class, resolved_method,
+                                        update_stats, direct_code, direct_method);
+          *invoke_type = kDirect;
           return true;
         }
-        const bool kEnableVerifierBasedSharpening = true;
-        if (kEnableVerifierBasedSharpening && (invoke_type == kVirtual ||
-                                               invoke_type == kInterface)) {
+        const bool enableVerifierBasedSharpening = enable_devirtualization;
+        if (enableVerifierBasedSharpening && (*invoke_type == kVirtual ||
+                                              *invoke_type == kInterface)) {
           // Did the verifier record a more precise invoke target based on its type information?
           const MethodReference caller_method(mUnit->GetDexFile(), mUnit->GetDexMethodIndex());
           const MethodReference* devirt_map_target =
@@ -1192,14 +1194,14 @@
                                                        kVirtual);
             CHECK(called_method != NULL);
             CHECK(!called_method->IsAbstract());
-            GetCodeAndMethodForDirectCall(invoke_type, kDirect, referrer_class, called_method,
-                                          direct_code, direct_method, update_stats);
+            GetCodeAndMethodForDirectCall(*invoke_type, kDirect, referrer_class, called_method,
+                                          update_stats, direct_code, direct_method);
             bool compiler_needs_dex_cache =
                 (GetCompilerBackend() == kPortable) ||
                 (GetCompilerBackend() == kQuick && instruction_set_ != kThumb2) ||
-                (direct_code == 0) || (direct_code == static_cast<unsigned int>(-1)) ||
-                (direct_method == 0) || (direct_method == static_cast<unsigned int>(-1));
-            if ((devirt_map_target->dex_file != target_method.dex_file) &&
+                (*direct_code == 0) || (*direct_code == static_cast<unsigned int>(-1)) ||
+                (*direct_method == 0) || (*direct_method == static_cast<unsigned int>(-1));
+            if ((devirt_map_target->dex_file != target_method->dex_file) &&
                 compiler_needs_dex_cache) {
               // We need to use the dex cache to find either the method or code, and the dex file
               // containing the method isn't the one expected for the target method. Try to find
@@ -1209,7 +1211,7 @@
               // TODO: quick only supports direct pointers with Thumb2.
               // TODO: the following should be factored into a common helper routine to find
               //       one dex file's method within another.
-              const DexFile* dexfile = target_method.dex_file;
+              const DexFile* dexfile = target_method->dex_file;
               const DexFile* cm_dexfile =
                   called_method->GetDeclaringClass()->GetDexCache()->GetDexFile();
               const DexFile::MethodId& cm_method_id =
@@ -1225,8 +1227,9 @@
                   if (name != NULL) {
                     uint16_t return_type_idx;
                     std::vector<uint16_t> param_type_idxs;
-                    bool success = dexfile->CreateTypeList(&return_type_idx, &param_type_idxs,
-                                                           cm_dexfile->GetMethodSignature(cm_method_id));
+                    bool success =
+                        dexfile->CreateTypeList(cm_dexfile->GetMethodSignature(cm_method_id).ToString(),
+                                                &return_type_idx, &param_type_idxs);
                     if (success) {
                       const DexFile::ProtoId* sig =
                           dexfile->FindProtoId(return_type_idx, param_type_idxs);
@@ -1235,12 +1238,13 @@
                                                                                     *name, *sig);
                         if (method_id != NULL) {
                           if (update_stats) {
-                            stats_->ResolvedMethod(invoke_type);
-                            stats_->VirtualMadeDirect(invoke_type);
+                            stats_->ResolvedMethod(*invoke_type);
+                            stats_->VirtualMadeDirect(*invoke_type);
                             stats_->PreciseTypeDevirtualization();
                           }
-                          target_method.dex_method_index = dexfile->GetIndexForMethodId(*method_id);
-                          invoke_type = kDirect;
+                          target_method->dex_method_index =
+                              dexfile->GetIndexForMethodId(*method_id);
+                          *invoke_type = kDirect;
                           return true;
                         }
                       }
@@ -1252,28 +1256,28 @@
               //       method in the referring method's dex cache/file.
             } else {
               if (update_stats) {
-                stats_->ResolvedMethod(invoke_type);
-                stats_->VirtualMadeDirect(invoke_type);
+                stats_->ResolvedMethod(*invoke_type);
+                stats_->VirtualMadeDirect(*invoke_type);
                 stats_->PreciseTypeDevirtualization();
               }
-              target_method = *devirt_map_target;
-              invoke_type = kDirect;
+              *target_method = *devirt_map_target;
+              *invoke_type = kDirect;
               return true;
             }
           }
         }
-        if (invoke_type == kSuper) {
+        if (*invoke_type == kSuper) {
           // Unsharpened super calls are suspicious so go slow-path.
         } else {
           // Sharpening failed so generate a regular resolved method dispatch.
           if (update_stats) {
-            stats_->ResolvedMethod(invoke_type);
+            stats_->ResolvedMethod(*invoke_type);
           }
-          if (invoke_type == kVirtual || invoke_type == kSuper) {
-            vtable_idx = resolved_method->GetMethodIndex();
+          if (*invoke_type == kVirtual || *invoke_type == kSuper) {
+            *vtable_idx = resolved_method->GetMethodIndex();
           }
-          GetCodeAndMethodForDirectCall(invoke_type, invoke_type, referrer_class, resolved_method,
-                                        direct_code, direct_method, update_stats);
+          GetCodeAndMethodForDirectCall(*invoke_type, *invoke_type, referrer_class, resolved_method,
+                                        update_stats, direct_code, direct_method);
           return true;
         }
       }
@@ -1284,7 +1288,7 @@
       soa.Self()->ClearException();
   }
   if (update_stats) {
-    stats_->UnresolvedMethod(invoke_type);
+    stats_->UnresolvedMethod(*invoke_type);
   }
   return false;  // Incomplete knowledge needs slow path.
 }
@@ -1569,8 +1573,8 @@
     CHECK(soa.Self()->IsExceptionPending());
     mirror::Throwable* exception = soa.Self()->GetException(NULL);
     VLOG(compiler) << "Exception during type resolution: " << exception->Dump();
-    if (strcmp(ClassHelper(exception->GetClass()).GetDescriptor(),
-               "Ljava/lang/OutOfMemoryError;") == 0) {
+    if (ClassHelper(exception->GetClass()).GetDescriptorAsStringPiece() ==
+        "Ljava/lang/OutOfMemoryError;") {
       // There's little point continuing compilation if the heap is exhausted.
       LOG(FATAL) << "Out of memory during type resolution for compilation";
     }
@@ -1589,13 +1593,11 @@
   if (IsImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
-    // TODO: strdup memory leak.
-    timings.NewSplit(strdup(("Resolve " + dex_file.GetLocation() + " Types").c_str()));
+    timings.NewSplit("Resolve Types");
     context.ForAll(0, dex_file.NumTypeIds(), ResolveType, thread_count_);
   }
 
-  // TODO: strdup memory leak.
-  timings.NewSplit(strdup(("Resolve " + dex_file.GetLocation() + " MethodsAndFields").c_str()));
+  timings.NewSplit("Resolve MethodsAndFields");
   context.ForAll(0, dex_file.NumClassDefs(), ResolveClassFieldsAndMethods, thread_count_);
 }
 
@@ -1658,8 +1660,7 @@
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
-  // TODO: strdup memory leak.
-  timings.NewSplit(strdup(("Verify " + dex_file.GetLocation()).c_str()));
+  timings.NewSplit("Verify Dex File");
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_);
@@ -2084,11 +2085,14 @@
 static void InitializeClass(const ParallelCompilationManager* manager, size_t class_def_index)
     LOCKS_EXCLUDED(Locks::mutator_lock_) {
   ATRACE_CALL();
-  const DexFile::ClassDef& class_def = manager->GetDexFile()->GetClassDef(class_def_index);
+  const DexFile* dex_file = manager->GetDexFile();
+  const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+  const DexFile::TypeId& class_type_id = dex_file->GetTypeId(class_def.class_idx_);
+  StringPiece descriptor(dex_file->StringDataAsStringPieceByIdx(class_type_id.descriptor_idx_));
+
   ScopedObjectAccess soa(Thread::Current());
   mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader());
-  const char* descriptor = manager->GetDexFile()->GetClassDescriptor(class_def);
-  mirror::Class* klass = manager->GetClassLinker()->FindClass(descriptor, class_loader);
+  mirror::Class* klass = manager->GetClassLinker()->FindClass(descriptor.data(), class_loader);
   if (klass != NULL) {
     // Only try to initialize classes that were successfully verified.
     if (klass->IsVerified()) {
@@ -2118,7 +2122,7 @@
             bool is_black_listed = StringPiece(descriptor).ends_with("$NoPreloadHolder;");
             if (!is_black_listed) {
               for (size_t i = 0; i < arraysize(class_initializer_black_list); ++i) {
-                if (StringPiece(descriptor) == class_initializer_black_list[i]) {
+                if (descriptor == class_initializer_black_list[i]) {
                   is_black_listed = true;
                   break;
                 }
@@ -2126,7 +2130,7 @@
             }
             if (!is_black_listed) {
               VLOG(compiler) << "Initializing: " << descriptor;
-              if (StringPiece(descriptor) == "Ljava/lang/Void;") {
+              if (descriptor == "Ljava/lang/Void;") {
                 // Hand initialize j.l.Void to avoid Dex file operations in un-started runtime.
                 ObjectLock lock(soa.Self(), klass);
                 mirror::ObjectArray<mirror::ArtField>* fields = klass->GetSFields();
@@ -2157,8 +2161,7 @@
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
                                        ThreadPool& thread_pool, base::TimingLogger& timings) {
-  // TODO: strdup memory leak.
-  timings.NewSplit(strdup(("InitializeNoClinit " + dex_file.GetLocation()).c_str()));
+  timings.NewSplit("InitializeNoClinit");
 #ifndef NDEBUG
   // Sanity check blacklist descriptors.
   if (IsImage()) {
@@ -2265,8 +2268,7 @@
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
                                     ThreadPool& thread_pool, base::TimingLogger& timings) {
-  // TODO: strdup memory leak.
-  timings.NewSplit(strdup(("Compile " + dex_file.GetLocation()).c_str()));
+  timings.NewSplit("Compile Dex File");
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 3852acf..7657af5 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -170,22 +170,23 @@
      LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Can we fast path instance field access? Computes field's offset and volatility.
-  bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                int& field_offset, bool& is_volatile, bool is_put)
+  bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
+                                int* field_offset, bool* is_volatile)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Can we fastpath static field access? Computes field's offset, volatility and whether the
   // field is within the referrer (which can avoid checking class initialization).
-  bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                              int& field_offset, int& ssb_index,
-                              bool& is_referrers_class, bool& is_volatile, bool is_put)
+  bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
+                              int* field_offset, int* ssb_index,
+                              bool* is_referrers_class, bool* is_volatile)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Can we fastpath a interface, super class or virtual method call? Computes method's vtable
   // index.
   bool ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc,
-                         InvokeType& type, MethodReference& target_method, int& vtable_idx,
-                         uintptr_t& direct_code, uintptr_t& direct_method, bool update_stats)
+                         bool update_stats, bool enable_devirtualization,
+                         InvokeType* type, MethodReference* target_method, int* vtable_idx,
+                         uintptr_t* direct_code, uintptr_t* direct_method)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   bool IsSafeCast(const MethodReference& mr, uint32_t dex_pc);
@@ -308,7 +309,7 @@
   }
 
   // Checks if class specified by type_idx is one of the image_classes_
-  bool IsImageClass(const char* descriptor) const;
+  bool IsImageClass(const StringPiece& descriptor) const;
 
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       LOCKS_EXCLUDED(compiled_classes_lock_);
@@ -323,8 +324,8 @@
   void GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type,
                                      mirror::Class* referrer_class,
                                      mirror::ArtMethod* method,
-                                     uintptr_t& direct_code, uintptr_t& direct_method,
-                                     bool update_stats)
+                                     bool update_stats,
+                                     uintptr_t* direct_code, uintptr_t* direct_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
@@ -458,27 +459,40 @@
   class DedupeHashFunc {
    public:
     size_t operator()(const std::vector<uint8_t>& array) const {
-      // Take a random sample of bytes.
+      // For small arrays compute a hash using every byte.
       static const size_t kSmallArrayThreshold = 16;
-      static const size_t kRandomHashCount = 16;
-      size_t hash = 0;
-      if (array.size() < kSmallArrayThreshold) {
-        for (auto c : array) {
-          hash = hash * 54 + c;
+      size_t hash = 0x811c9dc5;
+      if (array.size() <= kSmallArrayThreshold) {
+        for (uint8_t b : array) {
+          hash = (hash * 16777619) ^ b;
         }
       } else {
-        for (size_t i = 0; i < kRandomHashCount; ++i) {
+        // For larger arrays use the 2 bytes at 6 bytes (the location of a push registers
+        // instruction field for quick generated code on ARM) and then select a number of other
+        // values at random.
+        static const size_t kRandomHashCount = 16;
+        for (size_t i = 0; i < 2; ++i) {
+          uint8_t b = array[i + 6];
+          hash = (hash * 16777619) ^ b;
+        }
+        for (size_t i = 2; i < kRandomHashCount; ++i) {
           size_t r = i * 1103515245 + 12345;
-          hash = hash * 54 + array[r % array.size()];
+          uint8_t b = array[r % array.size()];
+          hash = (hash * 16777619) ^ b;
         }
       }
+      hash += hash << 13;
+      hash ^= hash >> 7;
+      hash += hash << 3;
+      hash ^= hash >> 17;
+      hash += hash << 5;
       return hash;
     }
   };
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_code_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_mapping_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_vmap_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_gc_map_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_code_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_mapping_table_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_vmap_table_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6464a4c..d4be7c0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,6 +23,8 @@
 #include "compiler/oat_writer.h"
 #include "gc/space/image_space.h"
 #include "image.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
 #include "signal_catcher.h"
 #include "UniquePtr.h"
 #include "utils.h"
@@ -158,7 +160,7 @@
       // non image classes should be in a space after the image.
       EXPECT_GT(reinterpret_cast<byte*>(klass), image_end) << descriptor;
     }
-    EXPECT_TRUE(Monitor::IsValidLockWord(*klass->GetRawLockWordAddress()));
+    EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord()));
   }
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index f82c6fb..bcdc1c1 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -241,7 +241,7 @@
 }
 
 bool ImageWriter::IsImageClass(const Class* klass) {
-  return compiler_driver_.IsImageClass(ClassHelper(klass).GetDescriptor());
+  return compiler_driver_.IsImageClass(ClassHelper(klass).GetDescriptorAsStringPiece());
 }
 
 struct NonImageClasses {
@@ -296,7 +296,7 @@
 bool ImageWriter::NonImageClassesVisitor(Class* klass, void* arg) {
   NonImageClasses* context = reinterpret_cast<NonImageClasses*>(arg);
   if (!context->image_writer->IsImageClass(klass)) {
-    context->non_image_classes->insert(ClassHelper(klass).GetDescriptor());
+    context->non_image_classes->insert(ClassHelper(klass).GetDescriptorAsStringPiece().as_string());
   }
   return true;
 }
diff --git a/compiler/jni/portable/jni_compiler.cc b/compiler/jni/portable/jni_compiler.cc
index 43408a7..0c14346 100644
--- a/compiler/jni/portable/jni_compiler.cc
+++ b/compiler/jni/portable/jni_compiler.cc
@@ -50,9 +50,9 @@
 using ::art::llvm::runtime_support::RuntimeId;
 
 JniCompiler::JniCompiler(LlvmCompilationUnit* cunit,
-                         CompilerDriver& driver,
+                         CompilerDriver* driver,
                          const DexCompilationUnit* dex_compilation_unit)
-    : cunit_(cunit), driver_(&driver), module_(cunit_->GetModule()),
+    : cunit_(cunit), driver_(driver), module_(cunit_->GetModule()),
       context_(cunit_->GetLLVMContext()), irb_(*cunit_->GetIRBuilder()),
       dex_compilation_unit_(dex_compilation_unit),
       func_(NULL), elf_func_idx_(0) {
diff --git a/compiler/jni/portable/jni_compiler.h b/compiler/jni/portable/jni_compiler.h
index d20c63b..ffabfe6 100644
--- a/compiler/jni/portable/jni_compiler.h
+++ b/compiler/jni/portable/jni_compiler.h
@@ -54,7 +54,7 @@
 class JniCompiler {
  public:
   JniCompiler(LlvmCompilationUnit* cunit,
-              CompilerDriver& driver,
+              CompilerDriver* driver,
               const DexCompilationUnit* dex_compilation_unit);
 
   CompiledMethod* Compile();
@@ -67,7 +67,7 @@
 
  private:
   LlvmCompilationUnit* cunit_;
-  CompilerDriver* driver_;
+  CompilerDriver* const driver_;
 
   ::llvm::Module* module_;
   ::llvm::LLVMContext* context_;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 1417fb9..b6b15f9 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -24,7 +24,6 @@
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
-#include "disassembler.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_internal.h"
 #include "utils/assembler.h"
@@ -85,7 +84,6 @@
 
   // Assembler that holds generated instructions
   UniquePtr<Assembler> jni_asm(Assembler::Create(instruction_set));
-  bool should_disassemble = false;
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -366,10 +364,6 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  if (should_disassemble) {
-    UniquePtr<Disassembler> disassembler(Disassembler::Create(instruction_set));
-    disassembler->Dump(LOG(INFO), &managed_code[0], &managed_code[managed_code.size()]);
-  }
   return new CompiledMethod(compiler,
                             instruction_set,
                             managed_code,
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index a917cdc..d59afd4 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -26,6 +26,7 @@
 #include "ir_builder.h"
 #include "jni/portable/jni_compiler.h"
 #include "llvm_compilation_unit.h"
+#include "thread-inl.h"
 #include "utils_llvm.h"
 #include "verifier/method_verifier.h"
 
@@ -164,7 +165,7 @@
   UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
 
   UniquePtr<JniCompiler> jni_compiler(
-      new JniCompiler(cunit.get(), *compiler_driver_, dex_compilation_unit));
+      new JniCompiler(cunit.get(), compiler_driver_, dex_compilation_unit));
 
   return jni_compiler->Compile();
 }
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index 4f6fa0a..b206a25 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -846,10 +846,10 @@
   uintptr_t direct_code = 0;
   uintptr_t direct_method = 0;
   bool is_fast_path = driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc,
-                                                 invoke_type, target_method,
-                                                 vtable_idx,
-                                                 direct_code, direct_method,
-                                                 true);
+                                                 true, true,
+                                                 &invoke_type, &target_method,
+                                                 &vtable_idx,
+                                                 &direct_code, &direct_method);
   // Load the method object
   llvm::Value* callee_method_object_addr = NULL;
 
@@ -1630,7 +1630,7 @@
   int field_offset;
   bool is_volatile;
   bool is_fast_path = driver_->ComputeInstanceFieldInfo(
-    field_idx, dex_compilation_unit_, field_offset, is_volatile, false);
+    field_idx, dex_compilation_unit_, false, &field_offset, &is_volatile);
 
   if (!is_fast_path) {
     llvm::Function* runtime_func;
@@ -1692,7 +1692,7 @@
   int field_offset;
   bool is_volatile;
   bool is_fast_path = driver_->ComputeInstanceFieldInfo(
-    field_idx, dex_compilation_unit_, field_offset, is_volatile, true);
+    field_idx, dex_compilation_unit_, true, &field_offset, &is_volatile);
 
   if (!is_fast_path) {
     llvm::Function* runtime_func;
@@ -1897,8 +1897,8 @@
   bool is_volatile;
 
   bool is_fast_path = driver_->ComputeStaticFieldInfo(
-    field_idx, dex_compilation_unit_, field_offset, ssb_index,
-    is_referrers_class, is_volatile, false);
+    field_idx, dex_compilation_unit_, false,
+    &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
 
   llvm::Value* static_field_value;
 
@@ -1981,8 +1981,8 @@
   bool is_volatile;
 
   bool is_fast_path = driver_->ComputeStaticFieldInfo(
-    field_idx, dex_compilation_unit_, field_offset, ssb_index,
-    is_referrers_class, is_volatile, true);
+    field_idx, dex_compilation_unit_, true,
+    &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
 
   if (!is_fast_path) {
     llvm::Function* runtime_func;
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 139100b..feb495e 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -82,7 +82,6 @@
 #include "ir_builder.h"
 #include "os.h"
 #include "runtime_support_builder_arm.h"
-#include "runtime_support_builder_thumb2.h"
 #include "runtime_support_builder_x86.h"
 #include "utils_llvm.h"
 
@@ -118,12 +117,10 @@
   default:
     runtime_support_.reset(new RuntimeSupportBuilder(*context_, *module_, *irb_));
     break;
+  case kThumb2:
   case kArm:
     runtime_support_.reset(new RuntimeSupportBuilderARM(*context_, *module_, *irb_));
     break;
-  case kThumb2:
-    runtime_support_.reset(new RuntimeSupportBuilderThumb2(*context_, *module_, *irb_));
-    break;
   case kX86:
     runtime_support_.reset(new RuntimeSupportBuilderX86(*context_, *module_, *irb_));
     break;
@@ -214,6 +211,7 @@
   ::llvm::TargetOptions target_options;
   target_options.FloatABIType = ::llvm::FloatABI::Soft;
   target_options.NoFramePointerElim = true;
+  target_options.NoFramePointerElimNonLeaf = true;
   target_options.UseSoftFloat = false;
   target_options.EnableFastISel = false;
 
@@ -257,7 +255,7 @@
 
     ::llvm::OwningPtr< ::llvm::tool_output_file> out_file(
       new ::llvm::tool_output_file(bitcode_filename_.c_str(), errmsg,
-                                 ::llvm::sys::fs::F_Binary));
+                                 ::llvm::raw_fd_ostream::F_Binary));
 
 
     if (!errmsg.empty()) {
@@ -277,6 +275,7 @@
   // pm_builder.Inliner = ::llvm::createAlwaysInlinerPass();
   // pm_builder.Inliner = ::llvm::createPartialInliningPass();
   pm_builder.OptLevel = 3;
+  pm_builder.DisableSimplifyLibCalls = 1;
   pm_builder.DisableUnitAtATime = 1;
   pm_builder.populateFunctionPassManager(fpm);
   pm_builder.populateModulePassManager(pm);
diff --git a/compiler/llvm/runtime_support_builder.cc b/compiler/llvm/runtime_support_builder.cc
index 24e283d..c825fbf 100644
--- a/compiler/llvm/runtime_support_builder.cc
+++ b/compiler/llvm/runtime_support_builder.cc
@@ -164,89 +164,13 @@
 /* Monitor */
 
 void RuntimeSupportBuilder::EmitLockObject(::llvm::Value* object) {
-  Value* monitor =
-      irb_.LoadFromObjectOffset(object,
-                                mirror::Object::MonitorOffset().Int32Value(),
-                                irb_.getJIntTy(),
-                                kTBAARuntimeInfo);
-
-  Value* real_monitor =
-      irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-
-  // Is thin lock, unheld and not recursively acquired.
-  Value* unheld = irb_.CreateICmpEQ(real_monitor, irb_.getInt32(0));
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* bb_fast = BasicBlock::Create(context_, "lock_fast", parent_func);
-  BasicBlock* bb_slow = BasicBlock::Create(context_, "lock_slow", parent_func);
-  BasicBlock* bb_cont = BasicBlock::Create(context_, "lock_cont", parent_func);
-  irb_.CreateCondBr(unheld, bb_fast, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_fast);
-
-  // Calculate new monitor: new = old | (lock_id << LW_LOCK_OWNER_SHIFT)
-  Value* lock_id =
-      EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(),
-                               irb_.getInt32Ty(), kTBAARuntimeInfo);
-
-  Value* owner = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT);
-  Value* new_monitor = irb_.CreateOr(monitor, owner);
-
-  // Atomically update monitor.
-  Value* old_monitor =
-      irb_.CompareExchangeObjectOffset(object,
-                                       mirror::Object::MonitorOffset().Int32Value(),
-                                       monitor, new_monitor, kTBAARuntimeInfo);
-
-  Value* retry_slow_path = irb_.CreateICmpEQ(old_monitor, monitor);
-  irb_.CreateCondBr(retry_slow_path, bb_cont, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_slow);
   Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject);
   irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_cont);
 }
 
 void RuntimeSupportBuilder::EmitUnlockObject(::llvm::Value* object) {
-  Value* lock_id =
-      EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(),
-                               irb_.getJIntTy(),
-                               kTBAARuntimeInfo);
-  Value* monitor =
-      irb_.LoadFromObjectOffset(object,
-                                mirror::Object::MonitorOffset().Int32Value(),
-                                irb_.getJIntTy(),
-                                kTBAARuntimeInfo);
-
-  Value* my_monitor = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT);
-  Value* hash_state = irb_.CreateAnd(monitor, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-  Value* real_monitor = irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-
-  // Is thin lock, held by us and not recursively acquired
-  Value* is_fast_path = irb_.CreateICmpEQ(real_monitor, my_monitor);
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* bb_fast = BasicBlock::Create(context_, "unlock_fast", parent_func);
-  BasicBlock* bb_slow = BasicBlock::Create(context_, "unlock_slow", parent_func);
-  BasicBlock* bb_cont = BasicBlock::Create(context_, "unlock_cont", parent_func);
-  irb_.CreateCondBr(is_fast_path, bb_fast, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_fast);
-  // Set all bits to zero (except hash state)
-  irb_.StoreToObjectOffset(object,
-                           mirror::Object::MonitorOffset().Int32Value(),
-                           hash_state,
-                           kTBAARuntimeInfo);
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_slow);
   Function* slow_func = GetRuntimeSupportFunction(runtime_support::UnlockObject);
   irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_cont);
 }
 
 
diff --git a/compiler/llvm/runtime_support_builder.h b/compiler/llvm/runtime_support_builder.h
index e92ac0a..898611a 100644
--- a/compiler/llvm/runtime_support_builder.h
+++ b/compiler/llvm/runtime_support_builder.h
@@ -64,8 +64,8 @@
   virtual void EmitTestSuspend();
 
   /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-  virtual void EmitUnlockObject(::llvm::Value* object);
+  void EmitLockObject(::llvm::Value* object);
+  void EmitUnlockObject(::llvm::Value* object);
 
   /* MarkGCCard */
   virtual void EmitMarkGCCard(::llvm::Value* value, ::llvm::Value* target_addr);
diff --git a/compiler/llvm/runtime_support_builder_arm.cc b/compiler/llvm/runtime_support_builder_arm.cc
index 569d825..cad4624 100644
--- a/compiler/llvm/runtime_support_builder_arm.cc
+++ b/compiler/llvm/runtime_support_builder_arm.cc
@@ -116,24 +116,5 @@
   return old_thread_register;
 }
 
-
-/* Monitor */
-
-void RuntimeSupportBuilderARM::EmitLockObject(Value* object) {
-  RuntimeSupportBuilder::EmitLockObject(object);
-  FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                            /*isVarArg=*/false);
-  InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true);
-  irb_.CreateCall(func);
-}
-
-void RuntimeSupportBuilderARM::EmitUnlockObject(Value* object) {
-  RuntimeSupportBuilder::EmitUnlockObject(object);
-  FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                            /*isVarArg=*/false);
-  InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true);
-  irb_.CreateCall(func);
-}
-
 }  // namespace llvm
 }  // namespace art
diff --git a/compiler/llvm/runtime_support_builder_arm.h b/compiler/llvm/runtime_support_builder_arm.h
index 5a353d7..0d01509 100644
--- a/compiler/llvm/runtime_support_builder_arm.h
+++ b/compiler/llvm/runtime_support_builder_arm.h
@@ -34,10 +34,6 @@
   virtual void EmitStoreToThreadOffset(int64_t offset, ::llvm::Value* value,
                                        TBAASpecialType s_ty);
   virtual ::llvm::Value* EmitSetCurrentThread(::llvm::Value* thread);
-
-  /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-  virtual void EmitUnlockObject(::llvm::Value* object);
 };
 
 }  // namespace llvm
diff --git a/compiler/llvm/runtime_support_builder_thumb2.cc b/compiler/llvm/runtime_support_builder_thumb2.cc
deleted file mode 100644
index eff29c8..0000000
--- a/compiler/llvm/runtime_support_builder_thumb2.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "runtime_support_builder_thumb2.h"
-
-#include "ir_builder.h"
-#include "mirror/object.h"
-#include "monitor.h"
-#include "thread.h"
-#include "utils_llvm.h"
-
-#include <llvm/IR/DerivedTypes.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/InlineAsm.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Type.h>
-
-#include <inttypes.h>
-#include <vector>
-
-using ::llvm::BasicBlock;
-using ::llvm::Function;
-using ::llvm::FunctionType;
-using ::llvm::InlineAsm;
-using ::llvm::Type;
-using ::llvm::Value;
-
-namespace art {
-namespace llvm {
-
-
-void RuntimeSupportBuilderThumb2::EmitLockObject(Value* object) {
-  FunctionType* func_ty = FunctionType::get(/*Result=*/irb_.getInt32Ty(),
-                                            /*Params=*/irb_.getJObjectTy(),
-                                            /*isVarArg=*/false);
-  // $0: result
-  // $1: object
-  // $2: temp
-  // $3: temp
-  std::string asms;
-  StringAppendF(&asms, "add $3, $1, #%" PRId32 "\n", mirror::Object::MonitorOffset().Int32Value());
-  StringAppendF(&asms, "ldr $2, [r9, #%" PRId32 "]\n", Thread::ThinLockIdOffset().Int32Value());
-  StringAppendF(&asms, "ldrex $0, [$3]\n");
-  StringAppendF(&asms, "lsl $2, $2, %d\n", LW_LOCK_OWNER_SHIFT);
-  StringAppendF(&asms, "bfi $2, $0, #0, #%d\n", LW_LOCK_OWNER_SHIFT - 1);
-  StringAppendF(&asms, "bfc $0, #%d, #%d\n", LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  StringAppendF(&asms, "cmp $0, #0\n");
-  StringAppendF(&asms, "it eq\n");
-  StringAppendF(&asms, "strexeq $0, $2, [$3]\n");
-
-  InlineAsm* func = InlineAsm::get(func_ty, asms, "=&l,l,~l,~l", true);
-
-  Value* retry_slow_path = irb_.CreateCall(func, object);
-  retry_slow_path = irb_.CreateICmpNE(retry_slow_path, irb_.getJInt(0));
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* basic_block_lock = BasicBlock::Create(context_, "lock", parent_func);
-  BasicBlock* basic_block_cont = BasicBlock::Create(context_, "lock_cont", parent_func);
-  irb_.CreateCondBr(retry_slow_path, basic_block_lock, basic_block_cont, kUnlikely);
-
-  irb_.SetInsertPoint(basic_block_lock);
-  Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject);
-  irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(basic_block_cont);
-
-  irb_.SetInsertPoint(basic_block_cont);
-  {  // Memory barrier
-    FunctionType* asm_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                              /*isVarArg=*/false);
-    InlineAsm* func = InlineAsm::get(asm_ty, "dmb sy", "", true);
-    irb_.CreateCall(func);
-  }
-}
-
-
-}  // namespace llvm
-}  // namespace art
diff --git a/compiler/llvm/runtime_support_builder_thumb2.h b/compiler/llvm/runtime_support_builder_thumb2.h
deleted file mode 100644
index c47a274..0000000
--- a/compiler/llvm/runtime_support_builder_thumb2.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
-#define ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
-
-#include "runtime_support_builder_arm.h"
-
-namespace art {
-namespace llvm {
-
-class RuntimeSupportBuilderThumb2 : public RuntimeSupportBuilderARM {
- public:
-  RuntimeSupportBuilderThumb2(::llvm::LLVMContext& context, ::llvm::Module& module, IRBuilder& irb)
-    : RuntimeSupportBuilderARM(context, module, irb) {}
-
-  /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-};
-
-}  // namespace llvm
-}  // namespace art
-
-#endif  // ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index bfba9c0..6ac5d6a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -104,7 +104,7 @@
   ASSERT_TRUE(oat_file.get() != NULL);
   const OatHeader& oat_header = oat_file->GetOatHeader();
   ASSERT_TRUE(oat_header.IsValid());
-  ASSERT_EQ(2U, oat_header.GetDexFileCount());  // core and conscrypt
+  ASSERT_EQ(1U, oat_header.GetDexFileCount());  // core
   ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum());
   ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin());
   ASSERT_EQ("lue.art", oat_header.GetImageFileLocation());
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index f9d6e41..f23b72b 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -667,7 +667,7 @@
   const CompiledMethod* compiled_method =
       compiler_driver_->GetCompiledMethod(MethodReference(&dex_file, method_idx));
 
-  OatMethodOffsets method_offsets =
+  const OatMethodOffsets& method_offsets =
       oat_classes_[oat_class_index]->method_offsets_[class_def_method_index];
 
 
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index f3d35d7..53c1afa 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -18,62 +18,65 @@
 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
 
 #include <set>
+#include <string>
 
 #include "base/mutex.h"
 #include "base/stl_util.h"
 
 namespace art {
 
-// A simple data structure to handle hashed deduplication. Add is thread safe.
-template <typename Key, typename HashType, typename HashFunc>
+// A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
+// Add method. The data-structure is thread-safe through the use of internal locks, it also
+// supports the lock being sharded.
+template <typename Key, typename HashType, typename HashFunc, HashType kShard = 1>
 class DedupeSet {
   typedef std::pair<HashType, Key*> HashedKey;
 
   class Comparator {
    public:
     bool operator()(const HashedKey& a, const HashedKey& b) const {
-      if (a.first < b.first) return true;
-      if (a.first > b.first) return true;
-      return *a.second < *b.second;
+      if (a.first != b.first) {
+        return a.first < b.first;
+      } else {
+        return *a.second < *b.second;
+      }
     }
   };
 
-  typedef std::set<HashedKey, Comparator> Keys;
-
  public:
-  typedef typename Keys::iterator iterator;
-  typedef typename Keys::const_iterator const_iterator;
-  typedef typename Keys::size_type size_type;
-  typedef typename Keys::value_type value_type;
-
-  iterator begin() { return keys_.begin(); }
-  const_iterator begin() const { return keys_.begin(); }
-  iterator end() { return keys_.end(); }
-  const_iterator end() const { return keys_.end(); }
-
   Key* Add(Thread* self, const Key& key) {
-    HashType hash = HashFunc()(key);
-    HashedKey hashed_key(hash, const_cast<Key*>(&key));
-    MutexLock lock(self, lock_);
-    auto it = keys_.find(hashed_key);
-    if (it != keys_.end()) {
+    HashType raw_hash = HashFunc()(key);
+    HashType shard_hash = raw_hash / kShard;
+    HashType shard_bin = raw_hash % kShard;
+    HashedKey hashed_key(shard_hash, const_cast<Key*>(&key));
+    MutexLock lock(self, *lock_[shard_bin]);
+    auto it = keys_[shard_bin].find(hashed_key);
+    if (it != keys_[shard_bin].end()) {
       return it->second;
     }
     hashed_key.second = new Key(key);
-    keys_.insert(hashed_key);
+    keys_[shard_bin].insert(hashed_key);
     return hashed_key.second;
   }
 
-  DedupeSet() : lock_("dedupe lock") {
+  explicit DedupeSet(const char* set_name) {
+    for (HashType i = 0; i < kShard; ++i) {
+      lock_name_[i] = StringPrintf("%s lock %d", set_name, i);
+      lock_[i].reset(new Mutex(lock_name_[i].c_str()));
+    }
   }
 
   ~DedupeSet() {
-    STLDeleteValues(&keys_);
+    for (HashType i = 0; i < kShard; ++i) {
+      STLDeleteValues(&keys_[i]);
+    }
   }
 
  private:
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Keys keys_;
+  std::string lock_name_[kShard];
+  UniquePtr<Mutex> lock_[kShard];
+  std::set<HashedKey, Comparator> keys_[kShard];
+
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
 };
 
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 9f5e292..03d8b96 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -38,7 +38,7 @@
 TEST_F(DedupeSetTest, Test) {
   Thread* self = Thread::Current();
   typedef std::vector<uint8_t> ByteArray;
-  DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator;
+  DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator("test");
   ByteArray* array1;
   {
     ByteArray test1;
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 52584cf..a046391 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -38,7 +38,7 @@
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
 LOCAL_SHARED_LIBRARIES := libnativehelper
-LOCAL_LDFLAGS := -ldl
+LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c4cce2f..d5d1303 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -604,7 +604,7 @@
 #error "Unsupported architecture"
 #endif
   bool is_host = false;
-  bool dump_stats = kIsDebugBuild;
+  bool dump_stats = false;
   bool dump_timing = false;
   bool dump_slow_timing = kIsDebugBuild;
   bool watch_dog_enabled = !kIsTargetBuild;
@@ -696,6 +696,8 @@
       runtime_args.push_back(argv[i]);
     } else if (option == "--dump-timing") {
       dump_timing = true;
+    } else if (option == "--dump-stats") {
+      dump_stats = true;
     } else {
       Usage("Unknown argument %s", option.data());
     }
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
new file mode 100644
index 0000000..f8001a4
--- /dev/null
+++ b/disassembler/Android.mk
@@ -0,0 +1,120 @@
+#
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common.mk
+
+LIBART_DISASSEMBLER_SRC_FILES := \
+	disassembler.cc \
+	disassembler_arm.cc \
+	disassembler_mips.cc \
+	disassembler_x86.cc
+
+# $(1): target or host
+# $(2): ndebug or debug
+define build-libart-disassembler
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),ndebug)
+    ifneq ($(2),debug)
+      $$(error expected ndebug or debug for argument 2, received $(2))
+    endif
+  endif
+
+  art_target_or_host := $(1)
+  art_ndebug_or_debug := $(2)
+
+  include $(CLEAR_VARS)
+  ifeq ($$(art_target_or_host),target)
+    include external/stlport/libstlport.mk
+  else
+    LOCAL_IS_HOST_MODULE := true
+  endif
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  ifeq ($$(art_ndebug_or_debug),ndebug)
+    LOCAL_MODULE := libart-disassembler
+  else # debug
+    LOCAL_MODULE := libartd-disassembler
+  endif
+
+  LOCAL_MODULE_TAGS := optional
+  LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+
+  LOCAL_SRC_FILES := $$(LIBART_DISASSEMBLER_SRC_FILES)
+
+  GENERATED_SRC_DIR := $$(call intermediates-dir-for,$$(LOCAL_MODULE_CLASS),$$(LOCAL_MODULE),$$(LOCAL_IS_HOST_MODULE),)
+
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
+    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+  endif
+
+  LOCAL_SHARED_LIBRARIES += liblog
+  ifeq ($$(art_ndebug_or_debug),debug)
+    ifeq ($$(art_target_or_host),target)
+      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    else # host
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    endif
+    LOCAL_SHARED_LIBRARIES += libartd
+  else
+    ifeq ($$(art_target_or_host),target)
+      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    else # host
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
+    LOCAL_SHARED_LIBRARIES += libart
+  endif
+
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_SHARED_LIBRARIES += libcutils
+    include $(LLVM_GEN_INTRINSICS_MK)
+    include $(LLVM_DEVICE_BUILD_MK)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    LOCAL_STATIC_LIBRARIES += libcutils
+    include $(LLVM_GEN_INTRINSICS_MK)
+    include $(LLVM_HOST_BUILD_MK)
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+endef
+
+ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
+  $(eval $(call build-libart-disassembler,target,ndebug))
+endif
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+  $(eval $(call build-libart-disassembler,target,debug))
+endif
+ifeq ($(WITH_HOST_DALVIK),true)
+  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+  ifeq ($(ART_BUILD_NDEBUG),true)
+    $(eval $(call build-libart-disassembler,host,ndebug))
+  endif
+  ifeq ($(ART_BUILD_DEBUG),true)
+    $(eval $(call build-libart-disassembler,host,debug))
+  endif
+endif
diff --git a/runtime/disassembler.cc b/disassembler/disassembler.cc
similarity index 100%
rename from runtime/disassembler.cc
rename to disassembler/disassembler.cc
diff --git a/runtime/disassembler.h b/disassembler/disassembler.h
similarity index 90%
rename from runtime/disassembler.h
rename to disassembler/disassembler.h
index 805ff4d..7547ab7 100644
--- a/runtime/disassembler.h
+++ b/disassembler/disassembler.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_DISASSEMBLER_H_
-#define ART_RUNTIME_DISASSEMBLER_H_
+#ifndef ART_DISASSEMBLER_DISASSEMBLER_H_
+#define ART_DISASSEMBLER_DISASSEMBLER_H_
 
 #include <stdint.h>
 
@@ -45,4 +45,4 @@
 
 }  // namespace art
 
-#endif  // ART_RUNTIME_DISASSEMBLER_H_
+#endif  // ART_DISASSEMBLER_DISASSEMBLER_H_
diff --git a/runtime/disassembler_arm.cc b/disassembler/disassembler_arm.cc
similarity index 98%
rename from runtime/disassembler_arm.cc
rename to disassembler/disassembler_arm.cc
index 879d3ac..782c1f3 100644
--- a/runtime/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -278,6 +278,26 @@
     os << StringPrintf("%p: %08x\t%-7s ", instr_ptr, instruction, opcode.c_str()) << args.str() << '\n';
 }
 
+int32_t ThumbExpand(int32_t imm12) {
+  if ((imm12 & 0xC00) == 0) {
+    switch ((imm12 >> 8) & 3) {
+      case 0:
+        return imm12 & 0xFF;
+      case 1:
+        return ((imm12 & 0xFF) << 16) | (imm12 & 0xFF);
+      case 2:
+        return ((imm12 & 0xFF) << 24) | ((imm12 & 0xFF) << 8);
+      default:  // 3
+        return ((imm12 & 0xFF) << 24) | ((imm12 & 0xFF) << 16) | ((imm12 & 0xFF) << 8) |
+            (imm12 & 0xFF);
+    }
+  } else {
+    uint32_t val = 0x80 | (imm12 & 0x7F);
+    int32_t rotate = (imm12 >> 7) & 0x1F;
+    return (val >> rotate) | (val << (32 - rotate));
+  }
+}
+
 size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) {
   uint32_t instr = (ReadU16(instr_ptr) << 16) | ReadU16(instr_ptr + 2);
   // |111|1 1|1000000|0000|1111110000000000|
@@ -628,7 +648,7 @@
               opcode << "s";
             }
           }
-          args << Rd << ", ThumbExpand(" << imm32 << ")";
+          args << Rd << ", #" << ThumbExpand(imm32);
         } else if (Rd.r == 0xF && S == 1 &&
                    (op3 == 0x0 || op3 == 0x4 || op3 == 0x8 || op3 == 0xD)) {
           if (op3 == 0x0) {
@@ -640,7 +660,7 @@
           } else {
             opcode << "cmp.w";
           }
-          args << Rn << ", ThumbExpand(" << imm32 << ")";
+          args << Rn << ", #" << ThumbExpand(imm32);
         } else {
           switch (op3) {
             case 0x0: opcode << "and"; break;
@@ -658,7 +678,7 @@
           if (S == 1) {
             opcode << "s";
           }
-          args << Rd << ", " << Rn << ", ThumbExpand(" << imm32 << ")";
+          args << Rd << ", " << Rn << ", #" << ThumbExpand(imm32);
         }
       } else if ((instr & 0x8000) == 0 && (op2 & 0x20) != 0) {
         // Data-processing (plain binary immediate)
diff --git a/runtime/disassembler_arm.h b/disassembler/disassembler_arm.h
similarity index 90%
rename from runtime/disassembler_arm.h
rename to disassembler/disassembler_arm.h
index cab9150..2e699ff 100644
--- a/runtime/disassembler_arm.h
+++ b/disassembler/disassembler_arm.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_DISASSEMBLER_ARM_H_
-#define ART_RUNTIME_DISASSEMBLER_ARM_H_
+#ifndef ART_DISASSEMBLER_DISASSEMBLER_ARM_H_
+#define ART_DISASSEMBLER_DISASSEMBLER_ARM_H_
 
 #include <vector>
 
@@ -48,4 +48,4 @@
 }  // namespace arm
 }  // namespace art
 
-#endif  // ART_RUNTIME_DISASSEMBLER_ARM_H_
+#endif  // ART_DISASSEMBLER_DISASSEMBLER_ARM_H_
diff --git a/runtime/disassembler_mips.cc b/disassembler/disassembler_mips.cc
similarity index 100%
rename from runtime/disassembler_mips.cc
rename to disassembler/disassembler_mips.cc
diff --git a/runtime/disassembler_mips.h b/disassembler/disassembler_mips.h
similarity index 87%
rename from runtime/disassembler_mips.h
rename to disassembler/disassembler_mips.h
index e248503..d386267 100644
--- a/runtime/disassembler_mips.h
+++ b/disassembler/disassembler_mips.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_DISASSEMBLER_MIPS_H_
-#define ART_RUNTIME_DISASSEMBLER_MIPS_H_
+#ifndef ART_DISASSEMBLER_DISASSEMBLER_MIPS_H_
+#define ART_DISASSEMBLER_DISASSEMBLER_MIPS_H_
 
 #include <vector>
 
@@ -37,4 +37,4 @@
 }  // namespace mips
 }  // namespace art
 
-#endif  // ART_RUNTIME_DISASSEMBLER_MIPS_H_
+#endif  // ART_DISASSEMBLER_DISASSEMBLER_MIPS_H_
diff --git a/runtime/disassembler_x86.cc b/disassembler/disassembler_x86.cc
similarity index 100%
rename from runtime/disassembler_x86.cc
rename to disassembler/disassembler_x86.cc
diff --git a/runtime/disassembler_x86.h b/disassembler/disassembler_x86.h
similarity index 87%
rename from runtime/disassembler_x86.h
rename to disassembler/disassembler_x86.h
index ff4322c..9adaff7 100644
--- a/runtime/disassembler_x86.h
+++ b/disassembler/disassembler_x86.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_DISASSEMBLER_X86_H_
-#define ART_RUNTIME_DISASSEMBLER_X86_H_
+#ifndef ART_DISASSEMBLER_DISASSEMBLER_X86_H_
+#define ART_DISASSEMBLER_DISASSEMBLER_X86_H_
 
 #include "disassembler.h"
 
@@ -35,4 +35,4 @@
 }  // namespace x86
 }  // namespace art
 
-#endif  // ART_RUNTIME_DISASSEMBLER_X86_H_
+#endif  // ART_DISASSEMBLER_DISASSEMBLER_X86_H_
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index a63b229..7cee00e 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -22,17 +22,17 @@
 include art/build/Android.executable.mk
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libcutils,,target,ndebug))
+  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libcutils libart-disassembler,art/disassembler,target,ndebug))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libcutils,,target,debug))
+  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libcutils libartd-disassembler,art/disassembler,target,debug))
 endif
 
 ifeq ($(WITH_HOST_DALVIK),true)
   ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),,,host,ndebug))
+    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libart-disassembler,art/disassembler,host,ndebug))
   endif
   ifeq ($(ART_BUILD_HOST_DEBUG),true)
-    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),,,host,debug))
+    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libartd-disassembler,art/disassembler,host,debug))
   endif
 endif
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 304222a..6db5813 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1126,7 +1126,7 @@
     typedef SafeMap<std::string, SizeAndCount> SizeAndCountTable;
     SizeAndCountTable sizes_and_counts;
 
-    void Update(const std::string& descriptor, size_t object_bytes) {
+    void Update(const char* descriptor, size_t object_bytes) {
       SizeAndCountTable::iterator it = sizes_and_counts.find(descriptor);
       if (it != sizes_and_counts.end()) {
         it->second.bytes += object_bytes;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index a0ae4bf..d8abbf1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -38,10 +38,6 @@
 	dex_file.cc \
 	dex_file_verifier.cc \
 	dex_instruction.cc \
-	disassembler.cc \
-	disassembler_arm.cc \
-	disassembler_mips.cc \
-	disassembler_x86.cc \
 	elf_file.cc \
 	gc/allocator/dlmalloc.cc \
 	gc/accounting/card_table.cc \
@@ -64,6 +60,9 @@
 	instrumentation.cc \
 	intern_table.cc \
 	interpreter/interpreter.cc \
+	interpreter/interpreter_common.cc \
+	interpreter/interpreter_goto_table_impl.cc \
+	interpreter/interpreter_switch_impl.cc \
 	jdwp/jdwp_event.cc \
 	jdwp/jdwp_expand_buf.cc \
 	jdwp/jdwp_handler.cc \
@@ -244,6 +243,7 @@
 	jdwp/jdwp.h \
 	jdwp/jdwp_constants.h \
 	locks.h \
+	lock_word.h \
 	mirror/class.h \
 	thread.h \
 	thread_state.h \
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index ed3d476..cfffbea 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -25,7 +25,11 @@
 #define rSELF r9
 // Offset of field Thread::suspend_count_ verified in InitCpu
 #define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 60
 
 #endif  // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 9e6902d..352982f 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -42,10 +42,16 @@
 extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
 extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
 
+extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -71,7 +77,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -133,6 +142,30 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
+static bool quick_alloc_entry_points_instrumented = false;
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  quick_alloc_entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  if (quick_alloc_entry_points_instrumented) {
+    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
+    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
+  } else {
+    qpoints->pAllocArray = art_quick_alloc_array;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
+    qpoints->pAllocObject = art_quick_alloc_object;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  }
+}
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -147,16 +180,10 @@
   ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
 
   // Alloc
-  qpoints->pAllocArray = art_quick_alloc_array;
-  qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-  qpoints->pAllocObject = art_quick_alloc_object;
-  qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-  qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-  qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -179,7 +206,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -210,7 +240,7 @@
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
   qpoints->pLdiv = __aeabi_ldivmod;
-  qpoints->pLdivmod = __aeabi_ldivmod;  // result returned in r2:r3
+  qpoints->pLmod = __aeabi_ldivmod;  // result returned in r2:r3
   qpoints->pLmul = art_quick_mul_long;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a77ce01..d073177 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -152,6 +152,7 @@
     mov r1, r9                      @ pass Thread::Current
     mov r2, sp                      @ pass SP
     b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    bkpt
 END \c_name
 .endm
 
@@ -162,6 +163,7 @@
     mov r2, r9                      @ pass Thread::Current
     mov r3, sp                      @ pass SP
     b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    bkpt
 END \c_name
 .endm
 
@@ -318,22 +320,67 @@
 END art_quick_handle_fill_data
 
     /*
-     * Entry from managed code that calls artLockObjectFromCode, may block for GC.
+     * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
+     * possibly null object to lock.
      */
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
+    cbz    r0, slow_lock
+retry_lock:
+    ldrex  r1, [r0, #LOCK_WORD_OFFSET]
+    ldrt   r2, [r9, #THREAD_ID_OFFSET]
+    cmp    r1, #0
+    bmi    slow_lock                  @ lock word contains a monitor
+    bne    already_thin
+    @ unlocked case - r2 holds thread id with count of 0
+    strex  r3, r2, [r0, #LOCK_WORD_OFFSET]
+    cbnz   r3, strex_fail             @ store failed, retry
+    bx lr
+strex_fail:
+    b retry_lock                      @ unlikely forward branch, need to reload and recheck r1/r2
+already_thin:
+    eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    uxth   r2, r2                     @ zero top 16 bits
+    cbnz   r2, slow_lock              @ lock word and self thread id's match -> recursive lock
+                                      @ else contention, go to slow path
+    adds   r2, r1, #65536             @ increment count in lock word placing in r2 for storing
+    bmi    slow_lock                  @ if we overflow the count go slow
+    str    r2, [r0, #LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+    bx lr
+slow_lock:
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     mov    r2, sp                     @ pass SP
     bl     artLockObjectFromCode      @ (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
+     * r0 holds the possibly null object to lock.
      */
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
+    cbz    r0, slow_unlock
+    ldr    r1, [r0, #LOCK_WORD_OFFSET]
+    ldr    r2, [r9, #THREAD_ID_OFFSET]
+    cmp    r1, #0
+    bmi    slow_unlock                @ lock word contains a monitor
+    eor    r3, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    uxth   r3, r3                     @ zero top 16 bits
+    cbnz   r3, slow_unlock            @ do lock word and self thread id's match?
+    cmp    r1, #65536
+    bpl    recursive_thin_unlock
+    @ transition to unlocked, r3 holds 0
+    str    r3, [r0, #LOCK_WORD_OFFSET]
+    bx     lr
+recursive_thin_unlock:
+    sub    r1, r1, #65536
+    str    r1, [r0, #LOCK_WORD_OFFSET]
+    bx     lr
+slow_unlock:
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case exception allocation triggers GC
     mov    r1, r9                     @ pass Thread::Current
     mov    r2, sp                     @ pass SP
@@ -344,33 +391,96 @@
 END art_quick_unlock_object
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
+     * artThrowClassCastException.
      */
-    .extern artCheckCastFromCode
+    .extern artThrowClassCastException
 ENTRY art_quick_check_cast
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    @ save callee saves in case exception allocation triggers GC
-    mov    r2, r9                       @ pass Thread::Current
-    mov    r3, sp                       @ pass SP
-    bl     artCheckCastFromCode         @ (Class* a, Class* b, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_ZERO
-    DELIVER_PENDING_EXCEPTION
+    push {r0-r1, lr}                    @ save arguments, link register and pad
+    .save {r0-r1, lr}
+    .cfi_adjust_cfa_offset 12
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset lr, 8
+    sub sp, #4
+    .pad #4
+    .cfi_adjust_cfa_offset 4
+    bl artIsAssignableFromCode
+    cbz    r0, throw_class_cast_exception
+    add sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop {r0-r1, pc}
+throw_class_cast_exception:
+    add sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop {r0-r1, lr}
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r2, r9                      @ pass Thread::Current
+    mov r3, sp                      @ pass SP
+    b   artThrowClassCastException  @ (Class*, Class*, Thread*, SP)
+    bkpt
 END art_quick_check_cast
 
     /*
-     * Entry from managed code that calls artCanPutArrayElementFromCode and delivers exception on
-     * failure.
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * r0 = array, r1 = index, r2 = value
      */
-    .extern artCanPutArrayElementFromCode
-ENTRY art_quick_can_put_array_element
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    @ save callee saves in case exception allocation triggers GC
-    mov    r2, r9                         @ pass Thread::Current
-    mov    r3, sp                         @ pass SP
-    bl     artCanPutArrayElementFromCode  @ (Object* element, Class* array_class, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_can_put_array_element
+ENTRY art_quick_aput_obj_with_null_and_bound_check
+    tst r0, r0
+    bne art_quick_aput_obj_with_bound_check
+    b art_quick_throw_null_pointer_exception
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    ldr r3, [r0, #ARRAY_LENGTH_OFFSET]
+    cmp r3, r1
+    bhi art_quick_aput_obj
+    mov r0, r1
+    mov r1, r3
+    b art_quick_throw_array_bounds
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    cbz r2, do_aput_null
+    ldr r3, [r0, #CLASS_OFFSET]
+    ldr ip, [r2, #CLASS_OFFSET]
+    ldr r3, [r3, #CLASS_COMPONENT_TYPE_OFFSET]
+    cmp r3, ip  @ value's type == array's component type - trivial assignability
+    bne check_assignability
+do_aput:
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
+    lsr r0, r0, #7
+    strb r3, [r3, r0]
+    blx lr
+do_aput_null:
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    blx lr
+check_assignability:
+    push {r0-r2, lr}                 @ save arguments
+    mov r1, ip
+    mov r0, r3
+    bl artIsAssignableFromCode
+    cbz r0, throw_array_store_exception
+    pop {r0-r2, lr}
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
+    lsr r0, r0, #7
+    strb r3, [r3, r0]
+    blx lr
+throw_array_store_exception:
+    pop {r0-r2, lr}
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov r1, r2
+    mov r2, r9                   @ pass Thread::Current
+    mov r3, sp                   @ pass SP
+    b artThrowArrayStoreException  @ (Class*, Class*, Thread*, SP)
+    bkpt                         @ unreached
+END art_quick_aput_obj
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
@@ -706,6 +816,17 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_object
 
+    .extern artAllocObjectFromCodeInstrumented
+ENTRY art_quick_alloc_object_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r2, r9                     @ pass Thread::Current
+    mov    r3, sp                     @ pass SP
+    bl     artAllocObjectFromCodeInstrumented     @ (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_object_instrumented
+
     /*
      * Called by managed code to allocate an object when the caller doesn't know whether it has
      * access to the created type.
@@ -721,6 +842,17 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_object_with_access_check
 
+    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_object_with_access_check_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r2, r9                     @ pass Thread::Current
+    mov    r3, sp                     @ pass SP
+    bl     artAllocObjectFromCodeWithAccessCheckInstrumented  @ (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_object_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array.
      */
@@ -741,6 +873,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_array
 
+    .extern artAllocArrayFromCodeInstrumented
+ENTRY art_quick_alloc_array_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
+    bl     artAllocArrayFromCodeInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array when the caller doesn't know whether it has
      * access to the created type.
@@ -762,6 +911,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_array_with_access_check
 
+    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_array_with_access_check_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, SP)
+    bl     artAllocArrayFromCodeWithAccessCheckInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -782,6 +948,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_check_and_alloc_array
 
+    .extern artCheckAndAllocArrayFromCodeInstrumented
+ENTRY art_quick_check_and_alloc_array_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , SP)
+    bl     artCheckAndAllocArrayFromCodeInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_check_and_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -802,6 +985,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_check_and_alloc_array_with_access_check
 
+    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , SP)
+    bl     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_check_and_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index ea908be..8c1efeb 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -23,7 +23,9 @@
 
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 9a66352..5307997 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -25,6 +25,8 @@
 #define rSELF $s1
 // Offset of field Thread::suspend_count_ verified in InitCpu
 #define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 40d7cd9..cc975d75 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -41,10 +41,16 @@
 extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
 extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
 
+extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -70,7 +76,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -82,9 +91,9 @@
 extern int32_t CmplDouble(double a, double b);
 extern int32_t CmpgFloat(float a, float b);
 extern int32_t CmplFloat(float a, float b);
-extern "C" int64_t artLmulFromCode(int64_t a, int64_t b);
-extern "C" int64_t artLdivFromCode(int64_t a, int64_t b);
-extern "C" int64_t artLdivmodFromCode(int64_t a, int64_t b);
+extern "C" int64_t artLmul(int64_t a, int64_t b);
+extern "C" int64_t artLdiv(int64_t a, int64_t b);
+extern "C" int64_t artLmod(int64_t a, int64_t b);
 
 // Math conversions.
 extern "C" int32_t __fixsfsi(float op1);      // FLOAT_TO_INT
@@ -134,6 +143,30 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
+static bool quick_alloc_entry_points_instrumented = false;
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  quick_alloc_entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  if (quick_alloc_entry_points_instrumented) {
+    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
+    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
+  } else {
+    qpoints->pAllocArray = art_quick_alloc_array;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
+    qpoints->pAllocObject = art_quick_alloc_object;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  }
+}
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -148,16 +181,10 @@
   ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
 
   // Alloc
-  qpoints->pAllocArray = art_quick_alloc_array;
-  qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-  qpoints->pAllocObject = art_quick_alloc_object;
-  qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-  qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-  qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -180,7 +207,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -209,9 +239,9 @@
   qpoints->pIdivmod = NULL;
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
-  qpoints->pLdiv = artLdivFromCode;
-  qpoints->pLdivmod = artLdivmodFromCode;
-  qpoints->pLmul = artLmulFromCode;
+  qpoints->pLdiv = artLdiv;
+  qpoints->pLmod = artLmod;
+  qpoints->pLmul = artLmul;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
   qpoints->pUshrLong = art_quick_ushr_long;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 3d63ccc..7780bb3 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -283,6 +283,7 @@
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
     GENERATE_GLOBAL_POINTER
+art_quick_throw_null_pointer_exception_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a0, rSELF                 # pass Thread::Current
     la   $t9, artThrowNullPointerExceptionFromCode
@@ -309,6 +310,7 @@
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
     GENERATE_GLOBAL_POINTER
+art_quick_throw_array_bounds_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a2, rSELF                 # pass Thread::Current
     la   $t9, artThrowArrayBoundsFromCode
@@ -481,11 +483,13 @@
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
     GENERATE_GLOBAL_POINTER
+    beqz    $a0, art_quick_throw_null_pointer_exception_gp_set
+    nop
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME      # save callee saves in case we block
     move    $a1, rSELF                    # pass Thread::Current
     jal     artLockObjectFromCode         # (Object* obj, Thread*, $sp)
     move    $a2, $sp                      # pass $sp
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RETURN_IF_ZERO
 END art_quick_lock_object
 
     /*
@@ -494,6 +498,8 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     GENERATE_GLOBAL_POINTER
+    beqz    $a0, art_quick_throw_null_pointer_exception_gp_set
+    nop
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
     move    $a1, rSELF                # pass Thread::Current
     jal     artUnlockObjectFromCode   # (Object* obj, Thread*, $sp)
@@ -504,29 +510,116 @@
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
-    .extern artCheckCastFromCode
+    .extern artThrowClassCastException
 ENTRY art_quick_check_cast
     GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artCheckCastFromCode      # (Class* a, Class* b, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_ZERO
+    addiu  $sp, $sp, -16
+    .cfi_adjust_cfa_offset 16
+    sw     $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw     $t9, 8($sp)
+    sw     $a1, 4($sp)
+    sw     $a0, 0($sp)
+    jal    artIsAssignableFromCode
+    nop
+    beqz   $v0, throw_class_cast_exception
+    lw     $ra, 12($sp)
+    jr     $ra
+    addiu  $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+throw_class_cast_exception:
+    lw     $t9, 8($sp)
+    lw     $a1, 4($sp)
+    lw     $a0, 0($sp)
+    addiu  $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    move $a2, rSELF                 # pass Thread::Current
+    la   $t9, artThrowClassCastException
+    jr   $t9                        # artThrowClassCastException (Class*, Class*, Thread*, SP)
+    move $a3, $sp                   # pass $sp
 END art_quick_check_cast
 
     /*
-     * Entry from managed code that calls artCanPutArrayElementFromCode and delivers exception on
-     * failure.
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * a0 = array, a1 = index, a2 = value
      */
-    .extern artCanPutArrayElementFromCode
-ENTRY art_quick_can_put_array_element
+ENTRY art_quick_aput_obj_with_null_and_bound_check
     GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case exception allocation triggers GC
-    move    $a2, rSELF                     # pass Thread::Current
-    jal     artCanPutArrayElementFromCode  # (Object* element, Class* array_class, Thread*, $sp)
-    move    $a3, $sp                       # pass $sp
-    RETURN_IF_ZERO
-END art_quick_can_put_array_element
+    bnez    $a0, art_quick_aput_obj_with_bound_check_gp_set
+    nop
+    b art_quick_throw_null_pointer_exception_gp_set
+    nop
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    GENERATE_GLOBAL_POINTER
+art_quick_aput_obj_with_bound_check_gp_set:
+    lw $t0, ARRAY_LENGTH_OFFSET($a0)
+    sltu $t1, $a1, $t0
+    bnez $t1, art_quick_aput_obj_gp_set
+    nop
+    move $a0, $a1
+    b art_quick_throw_array_bounds_gp_set
+    move $a1, $t0
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    GENERATE_GLOBAL_POINTER
+art_quick_aput_obj_gp_set:
+    beqz $a2, do_aput_null
+    nop
+    lw $t0, CLASS_OFFSET($a0)
+    lw $t1, CLASS_OFFSET($a2)
+    lw $t0, CLASS_COMPONENT_TYPE_OFFSET($t0)
+    bne $t1, $t0, check_assignability  # value's type == array's component type - trivial assignability
+    nop
+do_aput:
+    sll $a1, $a1, 2
+    add $t0, $a0, $a1
+    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
+    srl $t1, $a0, 7
+    add $t1, $t1, $t0
+    sb  $t0, ($t1)
+    jr  $ra
+    nop
+do_aput_null:
+    sll $a1, $a1, 2
+    add $t0, $a0, $a1
+    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    jr  $ra
+    nop
+check_assignability:
+    addiu  $sp, $sp, -32
+    .cfi_adjust_cfa_offset 32
+    sw     $ra, 28($sp)
+    .cfi_rel_offset 31, 28
+    sw     $t9, 12($sp)
+    sw     $a2, 8($sp)
+    sw     $a1, 4($sp)
+    sw     $a0, 0($sp)
+    move   $a1, $t1
+    move   $a0, $t0
+    jal    artIsAssignableFromCode  # (Class*, Class*)
+    nop
+    lw     $ra, 28($sp)
+    lw     $t9, 12($sp)
+    lw     $a2, 8($sp)
+    lw     $a1, 4($sp)
+    lw     $a0, 0($sp)
+    add    $sp, 32
+    .cfi_adjust_cfa_offset -32
+    bnez   $v0, do_aput
+    nop
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    move $a1, $a2
+    move $a2, rSELF                 # pass Thread::Current
+    la   $t9, artThrowArrayStoreException
+    jr   $t9                        # artThrowArrayStoreException(Class*, Class*, Thread*, SP)
+    move $a3, $sp                   # pass $sp
+END art_quick_aput_obj
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
@@ -770,6 +863,16 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_object
 
+    .extern artAllocObjectFromCodeInstrumented
+ENTRY art_quick_alloc_object_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a2, rSELF                # pass Thread::Current
+    jal     artAllocObjectFromCodeInstrumented    # (uint32_t type_idx, Method* method, Thread*, $sp)
+    move    $a3, $sp                  # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_object_instrumented
+
     /*
      * Called by managed code to allocate an object when the caller doesn't know whether it has
      * access to the created type.
@@ -784,6 +887,16 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_object_with_access_check
 
+    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_object_with_access_check_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a2, rSELF                # pass Thread::Current
+    jal     artAllocObjectFromCodeWithAccessCheckInstrumented  # (uint32_t type_idx, Method* method, Thread*, $sp)
+    move    $a3, $sp                  # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_object_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array.
      */
@@ -798,6 +911,17 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_array
 
+    .extern artAllocArrayFromCodeInstrumented
+ENTRY art_quick_alloc_array_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, $sp)
+    jal     artAllocArrayFromCodeInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array when the caller doesn't know whether it has
      * access to the created type.
@@ -813,6 +937,17 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_array_with_access_check
 
+    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_array_with_access_check_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, $sp)
+    jal     artAllocArrayFromCodeWithAccessCheckInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -827,6 +962,17 @@
     RETURN_IF_NONZERO
 END art_quick_check_and_alloc_array
 
+    .extern artCheckAndAllocArrayFromCodeInstrumented
+ENTRY art_quick_check_and_alloc_array_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , $sp)
+    jal     artCheckAndAllocArrayFromCodeInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_check_and_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -841,6 +987,17 @@
     RETURN_IF_NONZERO
 END art_quick_check_and_alloc_array_with_access_check
 
+    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , $sp)
+    jal     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_check_and_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index 7364de0..bd54549 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -23,6 +23,7 @@
 
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
 }
 
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index 1092910..e817ff7 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -21,7 +21,11 @@
 
 // Offset of field Thread::self_ verified in InitCpu
 #define THREAD_SELF_OFFSET 40
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 60
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index abc2990..89dd1b8 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -40,10 +40,16 @@
 extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
 extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
 
+extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+
 // Cast entrypoints.
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                                 const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -66,7 +72,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -82,7 +91,7 @@
 extern "C" int64_t art_quick_f2l(float);
 extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
-extern "C" int64_t art_quick_ldivmod(int64_t, int64_t);
+extern "C" int64_t art_quick_lmod(int64_t, int64_t);
 extern "C" int64_t art_quick_lmul(int64_t, int64_t);
 extern "C" uint64_t art_quick_lshl(uint64_t, uint32_t);
 extern "C" uint64_t art_quick_lshr(uint64_t, uint32_t);
@@ -116,6 +125,30 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
+static bool quick_alloc_entry_points_instrumented = false;
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  quick_alloc_entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  if (quick_alloc_entry_points_instrumented) {
+    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
+    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
+  } else {
+    qpoints->pAllocArray = art_quick_alloc_array;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
+    qpoints->pAllocObject = art_quick_alloc_object;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  }
+}
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -130,16 +163,10 @@
   ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
 
   // Alloc
-  qpoints->pAllocArray = art_quick_alloc_array;
-  qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-  qpoints->pAllocObject = art_quick_alloc_object;
-  qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-  qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-  qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -162,7 +189,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -192,7 +222,7 @@
   qpoints->pD2l = art_quick_d2l;
   qpoints->pF2l = art_quick_f2l;
   qpoints->pLdiv = art_quick_ldiv;
-  qpoints->pLdivmod = art_quick_ldivmod;
+  qpoints->pLmod = art_quick_lmod;
   qpoints->pLmul = art_quick_lmul;
   qpoints->pShlLong = art_quick_lshl;
   qpoints->pShrLong = art_quick_lshr;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index dbf552f..9fce72f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -389,26 +389,205 @@
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array, artCheckAndAllocArrayFromCode, RETURN_IF_EAX_NOT_ZERO
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check, artCheckAndAllocArrayFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
 
+TWO_ARG_DOWNCALL art_quick_alloc_object_instrumented, artAllocObjectFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
+TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check_instrumented, artAllocObjectFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_alloc_array_instrumented, artAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check_instrumented, artAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_instrumented, artCheckAndAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check_instrumented, artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
+
 TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_EAX_NOT_ZERO
 
-ONE_ARG_DOWNCALL art_quick_lock_object, artLockObjectFromCode, ret
-ONE_ARG_DOWNCALL art_quick_unlock_object, artUnlockObjectFromCode, RETURN_IF_EAX_ZERO
-
 TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
+DEFINE_FUNCTION art_quick_lock_object
+    testl %eax, %eax                      // null check object/eax
+    jz   slow_lock
+retry_lock:
+    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   slow_lock                        // lock word contains a monitor
+    jnz  already_thin                     // lock word contains a thin lock
+    // unlocked case - %edx holds thread id with count of 0
+    movl %eax, %ecx                       // remember object in case of retry
+    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
+    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%ecx)
+    jnz  cmpxchg_fail                     // cmpxchg failed retry
+    ret
+cmpxchg_fail:
+    movl  %ecx, %eax                       // restore eax
+    jmp  retry_lock
+already_thin:
+    cmpw %ax, %dx                         // do we hold the lock already?
+    jne  slow_lock
+    addl LITERAL(65536), %eax             // increment recursion count
+    jb   slow_lock                        // count overflowed so go slow
+    movl %eax, LOCK_WORD_OFFSET(%ecx)     // update lockword, cmpxchg not necessary as we hold lock
+    ret
+slow_lock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    // Outgoing argument set up
+    PUSH eax                      // push padding
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH eax                      // pass object
+    call artLockObjectFromCode    // artLockObjectFromCode(object, Thread*, SP)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object
+
+DEFINE_FUNCTION art_quick_unlock_object
+    testl %eax, %eax                      // null check object/eax
+    jz   slow_unlock
+    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   slow_unlock                      // lock word contains a monitor
+    cmpw %cx, %dx                         // does the thread id match?
+    jne  slow_unlock
+    cmpl LITERAL(65536), %ecx
+    jae  recursive_thin_unlock
+    movl LITERAL(0), LOCK_WORD_OFFSET(%eax)
+    ret
+recursive_thin_unlock:
+    subl LITERAL(65536), %ecx
+    mov  %ecx, LOCK_WORD_OFFSET(%eax)
+    ret
+slow_unlock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    // Outgoing argument set up
+    PUSH eax                      // push padding
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH eax                      // pass object
+    call artUnlockObjectFromCode    // artUnlockObjectFromCode(object, Thread*, SP)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object
+
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                     // alignment padding
-    PUSH ecx                    // pass arg2
-    PUSH eax                     // pass arg1
-    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b, Thread*, SP)
+    PUSH ecx                     // pass arg2 - obj->klass
+    PUSH eax                     // pass arg1 - checked class
+    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     addl LITERAL(12), %esp        // pop arguments
     .cfi_adjust_cfa_offset -12
     ret
 END_FUNCTION art_quick_is_assignable
 
+DEFINE_FUNCTION art_quick_check_cast
+    PUSH eax                     // alignment padding
+    PUSH ecx                     // pass arg2 - obj->klass
+    PUSH eax                     // pass arg1 - checked class
+    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    testl %eax, %eax
+    jz 1f                         // jump forward if not assignable
+    addl LITERAL(12), %esp        // pop arguments
+    .cfi_adjust_cfa_offset -12
+    ret
+1:
+    POP eax                       // pop arguments
+    POP ecx
+    addl LITERAL(4), %esp
+    .cfi_adjust_cfa_offset -12
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %edx
+    // Outgoing argument set up
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH ecx                      // pass arg2
+    PUSH eax                      // pass arg1
+    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_check_cast
+
+    /*
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * eax = array, ecx = index, edx = value
+     */
+DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+    testl %eax, %eax
+    jnz art_quick_aput_obj_with_bound_check
+    jmp art_quick_throw_null_pointer_exception
+END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+
+DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
+    movl ARRAY_LENGTH_OFFSET(%eax), %ebx
+    cmpl %ebx, %ecx
+    jb art_quick_aput_obj
+    mov %ecx, %eax
+    mov %ebx, %ecx
+    jmp art_quick_throw_array_bounds
+END_FUNCTION art_quick_aput_obj_with_bound_check
+
+DEFINE_FUNCTION art_quick_aput_obj
+    test %edx, %edx              // store of null
+    jz do_aput_null
+    movl CLASS_OFFSET(%eax), %ebx
+    movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+    cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
+    jne check_assignability
+do_aput:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
+    shrl LITERAL(7), %eax
+    movb %dl, (%edx, %eax)
+    ret
+do_aput_null:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    ret
+check_assignability:
+    PUSH eax                     // save arguments
+    PUSH ecx
+    PUSH edx
+    subl LITERAL(8), %esp        // alignment padding
+    .cfi_adjust_cfa_offset 8
+    pushl CLASS_OFFSET(%edx)     // pass arg2 - type of the value to be stored
+    .cfi_adjust_cfa_offset 4
+    PUSH ebx                     // pass arg1 - component type of the array
+    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
+    addl LITERAL(16), %esp       // pop arguments
+    .cfi_adjust_cfa_offset -16
+    testl %eax, %eax
+    jz   throw_array_store_exception
+    POP  edx
+    POP  ecx
+    POP  eax
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
+    movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
+    shrl LITERAL(7), %eax
+    movb %dl, (%edx, %eax)
+    ret
+throw_array_store_exception:
+    POP  edx
+    POP  ecx
+    POP  eax
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx
+    // Outgoing argument set up
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH edx                      // pass arg2 - value
+    PUSH eax                      // pass arg1 - array
+    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_aput_obj
+
 DEFINE_FUNCTION art_quick_memcpy
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
@@ -419,9 +598,6 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-TWO_ARG_DOWNCALL art_quick_check_cast, artCheckCastFromCode, RETURN_IF_EAX_ZERO
-TWO_ARG_DOWNCALL art_quick_can_put_array_element, artCanPutArrayElementFromCode, RETURN_IF_EAX_ZERO
-
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
 DEFINE_FUNCTION art_quick_fmod
@@ -508,30 +684,30 @@
 END_FUNCTION art_quick_idivmod
 
 DEFINE_FUNCTION art_quick_ldiv
-    subl LITERAL(12), %esp        // alignment padding
+    subl LITERAL(12), %esp       // alignment padding
     .cfi_adjust_cfa_offset 12
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    call SYMBOL(artLdivFromCode)  // (jlong a, jlong b)
-    addl LITERAL(28), %esp        // pop arguments
+    PUSH eax                     // pass arg1 a.lo
+    call SYMBOL(artLdiv)         // (jlong a, jlong b)
+    addl LITERAL(28), %esp       // pop arguments
     .cfi_adjust_cfa_offset -28
     ret
 END_FUNCTION art_quick_ldiv
 
-DEFINE_FUNCTION art_quick_ldivmod
-    subl LITERAL(12), %esp        // alignment padding
+DEFINE_FUNCTION art_quick_lmod
+    subl LITERAL(12), %esp       // alignment padding
     .cfi_adjust_cfa_offset 12
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    call SYMBOL(artLdivmodFromCode) // (jlong a, jlong b)
-    addl LITERAL(28), %esp        // pop arguments
+    PUSH eax                     // pass arg1 a.lo
+    call SYMBOL(artLmod)         // (jlong a, jlong b)
+    addl LITERAL(28), %esp       // pop arguments
     .cfi_adjust_cfa_offset -28
     ret
-END_FUNCTION art_quick_ldivmod
+END_FUNCTION art_quick_lmod
 
 DEFINE_FUNCTION art_quick_lmul
     imul %eax, %ebx              // ebx = a.lo(eax) * b.hi(ebx)
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index dd3e7dd..42789cb 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -134,6 +134,8 @@
 
   // Sanity check other offsets.
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
 }  // namespace art
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index aca93a5..a6700bc 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -21,6 +21,17 @@
 // check.
 #define SUSPEND_CHECK_INTERVAL (1000)
 
+// Offsets within java.lang.Object.
+#define CLASS_OFFSET 0
+#define LOCK_WORD_OFFSET 4
+
+// Offsets within java.lang.Class.
+#define CLASS_COMPONENT_TYPE_OFFSET 12
+
+// Array offsets.
+#define ARRAY_LENGTH_OFFSET 8
+#define OBJECT_ARRAY_DATA_OFFSET 12
+
 // Offsets within java.lang.String.
 #define STRING_VALUE_OFFSET 8
 #define STRING_COUNT_OFFSET 12
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 7e8365e..c0cfee2 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -41,6 +41,54 @@
 }
 #endif  // ART_USE_FUTEXES
 
+#if defined(__APPLE__)
+
+// This works on Mac OS 10.6 but hasn't been tested on older releases.
+struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
+  long padding0;  // NOLINT(runtime/int) exact match to darwin type
+  int padding1;
+  uint32_t padding2;
+  int16_t padding3;
+  int16_t padding4;
+  uint32_t padding5;
+  pthread_t darwin_pthread_mutex_owner;
+  // ...other stuff we don't care about.
+};
+
+struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
+  long padding0;  // NOLINT(runtime/int) exact match to darwin type
+  pthread_mutex_t padding1;
+  int padding2;
+  pthread_cond_t padding3;
+  pthread_cond_t padding4;
+  int padding5;
+  int padding6;
+  pthread_t darwin_pthread_rwlock_owner;
+  // ...other stuff we don't care about.
+};
+
+#endif  // __APPLE__
+
+#if defined(__GLIBC__)
+
+struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
+  int32_t padding0[2];
+  int owner;
+  // ...other stuff we don't care about.
+};
+
+struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
+#ifdef __LP64__
+  int32_t padding0[6];
+#else
+  int32_t padding0[7];
+#endif
+  int writer;
+  // ...other stuff we don't care about.
+};
+
+#endif  // __GLIBC__
+
 class ScopedContentionRecorder {
  public:
   ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid)
@@ -185,6 +233,84 @@
 #endif
 }
 
+inline bool Mutex::IsExclusiveHeld(const Thread* self) const {
+  DCHECK(self == NULL || self == Thread::Current());
+  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
+  if (kDebugLocking) {
+    // Sanity debug check that if we think it is locked we have it in our held mutexes.
+    if (result && self != NULL && level_ != kMonitorLock && !gAborting) {
+      CHECK_EQ(self->GetHeldMutex(level_), this);
+    }
+  }
+  return result;
+}
+
+inline uint64_t Mutex::GetExclusiveOwnerTid() const {
+#if ART_USE_FUTEXES
+  return exclusive_owner_;
+#elif defined(__BIONIC__)
+  return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
+#elif defined(__GLIBC__)
+  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
+#elif defined(__APPLE__)
+  const darwin_pthread_mutex_t* dpmutex = reinterpret_cast<const darwin_pthread_mutex_t*>(&mutex_);
+  pthread_t owner = dpmutex->darwin_pthread_mutex_owner;
+  // 0 for unowned, -1 for PTHREAD_MTX_TID_SWITCHING
+  // TODO: should we make darwin_pthread_mutex_owner volatile and recheck until not -1?
+  if ((owner == (pthread_t)0) || (owner == (pthread_t)-1)) {
+    return 0;
+  }
+  uint64_t tid;
+  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
+  return tid;
+#else
+#error unsupported C library
+#endif
+}
+
+inline bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
+  DCHECK(self == NULL || self == Thread::Current());
+  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
+  if (kDebugLocking) {
+    // Sanity that if the pthread thinks we own the lock the Thread agrees.
+    if (self != NULL && result)  {
+      CHECK_EQ(self->GetHeldMutex(level_), this);
+    }
+  }
+  return result;
+}
+
+inline uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
+#if ART_USE_FUTEXES
+  int32_t state = state_;
+  if (state == 0) {
+    return 0;  // No owner.
+  } else if (state > 0) {
+    return -1;  // Shared.
+  } else {
+    return exclusive_owner_;
+  }
+#else
+#if defined(__BIONIC__)
+  return rwlock_.writerThreadId;
+#elif defined(__GLIBC__)
+  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
+#elif defined(__APPLE__)
+  const darwin_pthread_rwlock_t*
+      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
+  pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
+  if (owner == (pthread_t)0) {
+    return 0;
+  }
+  uint64_t tid;
+  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
+  return tid;
+#else
+#error unsupported C library
+#endif
+#endif
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_MUTEX_INL_H_
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b99e7c9..249f031 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -31,54 +31,6 @@
 
 namespace art {
 
-#if defined(__APPLE__)
-
-// This works on Mac OS 10.6 but hasn't been tested on older releases.
-struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  int padding1;
-  uint32_t padding2;
-  int16_t padding3;
-  int16_t padding4;
-  uint32_t padding5;
-  pthread_t darwin_pthread_mutex_owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  pthread_mutex_t padding1;
-  int padding2;
-  pthread_cond_t padding3;
-  pthread_cond_t padding4;
-  int padding5;
-  int padding6;
-  pthread_t darwin_pthread_rwlock_owner;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __APPLE__
-
-#if defined(__GLIBC__)
-
-struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
-  int32_t padding0[2];
-  int owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
-#ifdef __LP64__
-  int32_t padding0[6];
-#else
-  int32_t padding0[7];
-#endif
-  int writer;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __GLIBC__
-
 #if ART_USE_FUTEXES
 static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) {
   const int32_t one_sec = 1000 * 1000 * 1000;  // one second in nanoseconds.
@@ -102,17 +54,17 @@
   std::set<BaseMutex*>* all_mutexes;
   AllMutexData() : all_mutexes(NULL) {}
 };
-static struct AllMutexData all_mutex_data[kAllMutexDataSize];
+static struct AllMutexData gAllMutexData[kAllMutexDataSize];
 
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!all_mutex_data->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) {
+    while (!gAllMutexData->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!all_mutex_data->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) {
+    while (!gAllMutexData->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) {
       NanoSleep(100);
     }
   }
@@ -123,7 +75,7 @@
 BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
-    std::set<BaseMutex*>** all_mutexes_ptr = &all_mutex_data->all_mutexes;
+    std::set<BaseMutex*>** all_mutexes_ptr = &gAllMutexData->all_mutexes;
     if (*all_mutexes_ptr == NULL) {
       // We leak the global set of all mutexes to avoid ordering issues in global variable
       // construction/destruction.
@@ -136,7 +88,7 @@
 BaseMutex::~BaseMutex() {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
-    all_mutex_data->all_mutexes->erase(this);
+    gAllMutexData->all_mutexes->erase(this);
   }
 }
 
@@ -144,13 +96,13 @@
   if (kLogLockContentions) {
     os << "Mutex logging:\n";
     ScopedAllMutexesLock mu(reinterpret_cast<const BaseMutex*>(-1));
-    std::set<BaseMutex*>* all_mutexes = all_mutex_data->all_mutexes;
+    std::set<BaseMutex*>* all_mutexes = gAllMutexData->all_mutexes;
     if (all_mutexes == NULL) {
       // No mutexes have been created yet during at startup.
       return;
     }
     typedef std::set<BaseMutex*>::const_iterator It;
-    os << "(Contented)\n";
+    os << "(Contended)\n";
     for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) {
       BaseMutex* mutex = *it;
       if (mutex->HasEverContended()) {
@@ -175,7 +127,8 @@
     return;
   }
   if (kDebugLocking) {
-    CHECK(self->GetHeldMutex(level_) == this) << "Waiting on unacquired mutex: " << name_;
+    CHECK(self->GetHeldMutex(level_) == this || level_ == kMonitorLock)
+        << "Waiting on unacquired mutex: " << name_;
     bool bad_mutexes_held = false;
     for (int i = kLockLevelCount - 1; i >= 0; --i) {
       if (i != level_) {
@@ -346,7 +299,7 @@
     bool done = false;
     do {
       int32_t cur_state = state_;
-      if (cur_state == 0) {
+      if (LIKELY(cur_state == 0)) {
         // Change state from 0 to 1.
         done = android_atomic_acquire_cas(0, 1, &state_) == 0;
       } else {
@@ -432,14 +385,14 @@
   bool done = false;
   do {
     int32_t cur_state = state_;
-    if (cur_state == 1) {
+    if (LIKELY(cur_state == 1)) {
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state to 0.
       done = android_atomic_release_cas(cur_state, 0, &state_) == 0;
-      if (done) {  // Spurious fail?
+      if (LIKELY(done)) {  // Spurious fail?
         // Wake a contender
-        if (num_contenders_ > 0) {
+        if (UNLIKELY(num_contenders_ > 0)) {
           futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
         }
       }
@@ -461,41 +414,6 @@
   }
 }
 
-bool Mutex::IsExclusiveHeld(const Thread* self) const {
-  DCHECK(self == NULL || self == Thread::Current());
-  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
-  if (kDebugLocking) {
-    // Sanity debug check that if we think it is locked we have it in our held mutexes.
-    if (result && self != NULL && level_ != kMonitorLock && !gAborting) {
-      CHECK_EQ(self->GetHeldMutex(level_), this);
-    }
-  }
-  return result;
-}
-
-uint64_t Mutex::GetExclusiveOwnerTid() const {
-#if ART_USE_FUTEXES
-  return exclusive_owner_;
-#elif defined(__BIONIC__)
-  return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
-#elif defined(__APPLE__)
-  const darwin_pthread_mutex_t* dpmutex = reinterpret_cast<const darwin_pthread_mutex_t*>(&mutex_);
-  pthread_t owner = dpmutex->darwin_pthread_mutex_owner;
-  // 0 for unowned, -1 for PTHREAD_MTX_TID_SWITCHING
-  // TODO: should we make darwin_pthread_mutex_owner volatile and recheck until not -1?
-  if ((owner == (pthread_t)0) || (owner == (pthread_t)-1)) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
-}
-
 void Mutex::Dump(std::ostream& os) const {
   os << (recursive_ ? "recursive " : "non-recursive ")
       << name_
@@ -549,7 +467,7 @@
   bool done = false;
   do {
     int32_t cur_state = state_;
-    if (cur_state == 0) {
+    if (LIKELY(cur_state == 0)) {
       // Change state from 0 to -1.
       done = android_atomic_acquire_cas(0, -1, &state_) == 0;
     } else {
@@ -583,14 +501,14 @@
   bool done = false;
   do {
     int32_t cur_state = state_;
-    if (cur_state == -1) {
+    if (LIKELY(cur_state == -1)) {
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state from -1 to 0.
       done = android_atomic_release_cas(-1, 0, &state_) == 0;
-      if (done) {  // cmpxchg may fail due to noise?
+      if (LIKELY(done)) {  // cmpxchg may fail due to noise?
         // Wake any waiters.
-        if (num_pending_readers_ > 0 || num_pending_writers_ > 0) {
+        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_ > 0)) {
           futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
@@ -687,18 +605,6 @@
   return true;
 }
 
-bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
-  DCHECK(self == NULL || self == Thread::Current());
-  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
-  if (kDebugLocking) {
-    // Sanity that if the pthread thinks we own the lock the Thread agrees.
-    if (self != NULL && result)  {
-      CHECK_EQ(self->GetHeldMutex(level_), this);
-    }
-  }
-  return result;
-}
-
 bool ReaderWriterMutex::IsSharedHeld(const Thread* self) const {
   DCHECK(self == NULL || self == Thread::Current());
   bool result;
@@ -710,37 +616,6 @@
   return result;
 }
 
-uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
-#if ART_USE_FUTEXES
-  int32_t state = state_;
-  if (state == 0) {
-    return 0;  // No owner.
-  } else if (state > 0) {
-    return -1;  // Shared.
-  } else {
-    return exclusive_owner_;
-  }
-#else
-#if defined(__BIONIC__)
-  return rwlock_.writerThreadId;
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
-#elif defined(__APPLE__)
-  const darwin_pthread_rwlock_t*
-      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
-  pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
-  if (owner == (pthread_t)0) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
-#endif
-}
-
 void ReaderWriterMutex::Dump(std::ostream& os) const {
   os << name_
       << " level=" << static_cast<int>(level_)
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index ee37388..feb8a6c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -58,7 +58,7 @@
 // futex.
 const bool kLogLockContentions = false;
 #endif
-const size_t kContentionLogSize = 64;
+const size_t kContentionLogSize = 4;
 const size_t kContentionLogDataSize = kLogLockContentions ? 1 : 0;
 const size_t kAllMutexDataSize = kLogLockContentions ? 1 : 0;
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index f072820..06cf9ff 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1094,13 +1094,14 @@
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
 void ClassLinker::VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty) {
-  visitor(class_roots_, arg);
+  class_roots_ = down_cast<mirror::ObjectArray<mirror::Class>*>(visitor(class_roots_, arg));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
     if (!only_dirty || dex_caches_dirty_) {
-      for (mirror::DexCache* dex_cache : dex_caches_) {
-        visitor(dex_cache, arg);
+      for (mirror::DexCache*& dex_cache : dex_caches_) {
+        dex_cache = down_cast<mirror::DexCache*>(visitor(dex_cache, arg));
+        DCHECK(dex_cache != nullptr);
       }
       if (clean_dirty) {
         dex_caches_dirty_ = false;
@@ -1111,8 +1112,9 @@
   {
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
     if (!only_dirty || class_table_dirty_) {
-      for (const std::pair<size_t, mirror::Class*>& it : class_table_) {
-        visitor(it.second, arg);
+      for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+        it.second = down_cast<mirror::Class*>(visitor(it.second, arg));
+        DCHECK(it.second != nullptr);
       }
       if (clean_dirty) {
         class_table_dirty_ = false;
@@ -1123,7 +1125,8 @@
     // handle image roots by using the MS/CMS rescanning of dirty cards.
   }
 
-  visitor(array_iftable_, arg);
+  array_iftable_ = reinterpret_cast<mirror::IfTable*>(visitor(array_iftable_, arg));
+  DCHECK(array_iftable_ != nullptr);
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) {
@@ -1630,21 +1633,22 @@
 }
 
 void ClassLinker::FixupStaticTrampolines(mirror::Class* klass) {
-  ClassHelper kh(klass);
-  const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
-  CHECK(dex_class_def != NULL);
-  const DexFile& dex_file = kh.GetDexFile();
-  const byte* class_data = dex_file.GetClassData(*dex_class_def);
-  if (class_data == NULL) {
-    return;  // no fields or methods - for example a marker interface
+  if (klass->NumDirectMethods() == 0) {
+    return;  // No direct methods => no static methods.
   }
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsStarted() || runtime->UseCompileTimeClassPath()) {
-    // OAT file unavailable
-    return;
+    return;  // OAT file unavailable.
   }
+  ClassHelper kh(klass);
+  const DexFile& dex_file = kh.GetDexFile();
+  const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
+  CHECK(dex_class_def != nullptr);
+  const byte* class_data = dex_file.GetClassData(*dex_class_def);
+  // There should always be class data if there were direct methods.
+  CHECK(class_data != nullptr) << PrettyDescriptor(klass);
   UniquePtr<const OatFile::OatClass> oat_class(GetOatClass(dex_file, klass->GetDexClassDefIndex()));
-  CHECK(oat_class.get() != NULL);
+  CHECK(oat_class.get() != nullptr);
   ClassDataItemIterator it(dex_file, class_data);
   // Skip fields
   while (it.HasNextStaticField()) {
@@ -1653,7 +1657,7 @@
   while (it.HasNextInstanceField()) {
     it.Next();
   }
-  // Link the code of methods skipped by LinkCode
+  // Link the code of methods skipped by LinkCode.
   for (size_t method_index = 0; it.HasNextDirectMethod(); ++method_index, it.Next()) {
     mirror::ArtMethod* method = klass->GetDirectMethod(method_index);
     if (!method->IsStatic()) {
@@ -1849,7 +1853,7 @@
                                            SirtRef<mirror::Class>& klass) {
   uint32_t dex_method_idx = it.GetMemberIndex();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
-  StringPiece method_name(dex_file.GetMethodName(method_id));
+  StringPiece method_name(dex_file.StringDataAsStringPieceByIdx(method_id.name_idx_));
 
   mirror::ArtMethod* dst = AllocArtMethod(self);
   if (UNLIKELY(dst == NULL)) {
@@ -1861,47 +1865,54 @@
   const char* old_cause = self->StartAssertNoThreadSuspension("LoadMethod");
   dst->SetDexMethodIndex(dex_method_idx);
   dst->SetDeclaringClass(klass.get());
-
-  if (method_name == "finalize") {
-    // Create the prototype for a signature of "()V"
-    const DexFile::StringId* void_string_id = dex_file.FindStringId("V");
-    if (void_string_id != NULL) {
-      const DexFile::TypeId* void_type_id =
-          dex_file.FindTypeId(dex_file.GetIndexForStringId(*void_string_id));
-      if (void_type_id != NULL) {
-        std::vector<uint16_t> no_args;
-        const DexFile::ProtoId* finalizer_proto =
-            dex_file.FindProtoId(dex_file.GetIndexForTypeId(*void_type_id), no_args);
-        if (finalizer_proto != NULL) {
-          // We have the prototype in the dex file
-          if (klass->GetClassLoader() != NULL) {  // All non-boot finalizer methods are flagged
-            klass->SetFinalizable();
-          } else {
-            ClassHelper kh(klass.get());
-            StringPiece klass_descriptor(kh.GetDescriptor());
-            // The Enum class declares a "final" finalize() method to prevent subclasses from
-            // introducing a finalizer. We don't want to set the finalizable flag for Enum or its
-            // subclasses, so we exclude it here.
-            // We also want to avoid setting the flag on Object, where we know that finalize() is
-            // empty.
-            if (klass_descriptor != "Ljava/lang/Object;" &&
-                klass_descriptor != "Ljava/lang/Enum;") {
-              klass->SetFinalizable();
-            }
-          }
-        }
-      }
-    }
-  }
   dst->SetCodeItemOffset(it.GetMethodCodeItemOffset());
-  dst->SetAccessFlags(it.GetMemberAccessFlags());
 
   dst->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
   dst->SetDexCacheResolvedMethods(klass->GetDexCache()->GetResolvedMethods());
   dst->SetDexCacheResolvedTypes(klass->GetDexCache()->GetResolvedTypes());
   dst->SetDexCacheInitializedStaticStorage(klass->GetDexCache()->GetInitializedStaticStorage());
 
-  CHECK(dst->IsArtMethod());
+  uint32_t access_flags = it.GetMemberAccessFlags();
+
+  if (UNLIKELY(method_name == "finalize")) {
+    // Set finalizable flag on declaring class.
+    const DexFile::ProtoId& proto = dex_file.GetProtoId(method_id.proto_idx_);
+    if (dex_file.GetProtoParameters(proto) == NULL) {  // No arguments
+      const DexFile::TypeId& return_type = dex_file.GetTypeId(proto.return_type_idx_);
+      if (dex_file.StringDataAsStringPieceByIdx(return_type.descriptor_idx_) == "V") {
+        // Void return type.
+        if (klass->GetClassLoader() != NULL) {  // All non-boot finalizer methods are flagged
+          klass->SetFinalizable();
+        } else {
+          ClassHelper kh(klass.get());
+          StringPiece klass_descriptor(kh.GetDescriptorAsStringPiece());
+          // The Enum class declares a "final" finalize() method to prevent subclasses from
+          // introducing a finalizer. We don't want to set the finalizable flag for Enum or its
+          // subclasses, so we exclude it here.
+          // We also want to avoid setting the flag on Object, where we know that finalize() is
+          // empty.
+          if (klass_descriptor != "Ljava/lang/Object;" &&
+              klass_descriptor != "Ljava/lang/Enum;") {
+            klass->SetFinalizable();
+          }
+        }
+      }
+    }
+  } else if (method_name[0] == '<') {
+    // Fix broken access flags for initializers. Bug 11157540.
+    bool is_init = (method_name == "<init>");
+    bool is_clinit = !is_init && (method_name == "<clinit>");
+    if (UNLIKELY(!is_init && !is_clinit)) {
+      LOG(WARNING) << "Unexpected '<' at start of method name " << method_name;
+    } else {
+      if (UNLIKELY((access_flags & kAccConstructor) == 0)) {
+        LOG(WARNING) << method_name << " didn't have expected constructor access flag in class "
+            << PrettyDescriptor(klass.get()) << " in dex file " << dex_file.GetLocation();
+        access_flags |= kAccConstructor;
+      }
+    }
+  }
+  dst->SetAccessFlags(access_flags);
 
   self->EndAssertNoThreadSuspension(old_cause);
   return dst;
@@ -2229,7 +2240,7 @@
        ++it) {
     mirror::Class* klass = it->second;
     kh.ChangeClass(klass);
-    if (strcmp(kh.GetDescriptor(), descriptor) == 0 && klass->GetClassLoader() == class_loader) {
+    if (kh.GetDescriptorAsStringPiece() == descriptor && klass->GetClassLoader() == class_loader) {
       class_table_.erase(it);
       return true;
     }
@@ -2275,15 +2286,16 @@
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
     kh.ChangeClass(klass);
-    if (klass->GetClassLoader() == class_loader && strcmp(descriptor, kh.GetDescriptor()) == 0) {
+    if (klass->GetClassLoader() == class_loader && kh.GetDescriptorAsStringPiece() == descriptor) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
           mirror::Class* klass2 = it->second;
           kh.ChangeClass(klass2);
-          CHECK(!(strcmp(descriptor, kh.GetDescriptor()) == 0 && klass2->GetClassLoader() == class_loader))
-          << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
-          << PrettyClass(klass2) << " " << klass2 << " " << klass2->GetClassLoader();
+          CHECK(!(kh.GetDescriptorAsStringPiece() == descriptor &&
+                  klass2->GetClassLoader() == class_loader))
+              << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
+              << PrettyClass(klass2) << " " << klass2 << " " << klass2->GetClassLoader();
         }
       }
       return klass;
@@ -2389,7 +2401,7 @@
       it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
     kh.ChangeClass(klass);
-    if (strcmp(descriptor, kh.GetDescriptor()) == 0) {
+    if (kh.GetDescriptorAsStringPiece() == descriptor) {
       result.push_back(klass);
     }
   }
@@ -2550,11 +2562,11 @@
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file.GetLocation(),
                                                                     &dex_location_checksum);
   CHECK(oat_dex_file != NULL) << dex_file.GetLocation() << " " << PrettyClass(klass);
-  const char* descriptor = ClassHelper(klass).GetDescriptor();
   uint16_t class_def_index = klass->GetDexClassDefIndex();
   UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(class_def_index));
   CHECK(oat_class.get() != NULL)
-          << dex_file.GetLocation() << " " << PrettyClass(klass) << " " << descriptor;
+          << dex_file.GetLocation() << " " << PrettyClass(klass) << " "
+          << ClassHelper(klass).GetDescriptor();
   oat_file_class_status = oat_class->GetStatus();
   if (oat_file_class_status == mirror::Class::kStatusVerified ||
       oat_file_class_status == mirror::Class::kStatusInitialized) {
@@ -2593,7 +2605,8 @@
     return false;
   }
   LOG(FATAL) << "Unexpected class status: " << oat_file_class_status
-             << " " << dex_file.GetLocation() << " " << PrettyClass(klass) << " " << descriptor;
+             << " " << dex_file.GetLocation() << " " << PrettyClass(klass) << " "
+             << ClassHelper(klass).GetDescriptor();
 
   return false;
 }
@@ -2835,12 +2848,12 @@
   CHECK(constructor->IsConstructor());
   MethodHelper mh(constructor);
   CHECK_STREQ(mh.GetName(), "<init>");
-  CHECK_EQ(mh.GetSignature(), std::string("(Ljava/lang/reflect/InvocationHandler;)V"));
+  CHECK_STREQ(mh.GetSignature().ToString().c_str(), "(Ljava/lang/reflect/InvocationHandler;)V");
   DCHECK(constructor->IsPublic());
 }
 
 mirror::ArtMethod* ClassLinker::CreateProxyMethod(Thread* self, SirtRef<mirror::Class>& klass,
-                                                       SirtRef<mirror::ArtMethod>& prototype) {
+                                                  SirtRef<mirror::ArtMethod>& prototype) {
   // Ensure prototype is in dex cache so that we can use the dex cache to look up the overridden
   // prototype method
   prototype->GetDeclaringClass()->GetDexCache()->SetResolvedMethod(prototype->GetDexMethodIndex(),
@@ -2904,7 +2917,7 @@
   }
   if (!can_init_statics) {
     // Check if there's a class initializer.
-    mirror::ArtMethod* clinit = klass->FindDeclaredDirectMethod("<clinit>", "()V");
+    mirror::ArtMethod* clinit = klass->FindClassInitializer();
     if (clinit != NULL) {
       return false;
     }
@@ -3051,7 +3064,7 @@
     }
   }
 
-  mirror::ArtMethod* clinit = klass->FindDeclaredDirectMethod("<clinit>", "()V");
+  mirror::ArtMethod* clinit = klass->FindClassInitializer();
   if (clinit != NULL) {
     CHECK(can_init_statics);
     if (LIKELY(Runtime::Current()->IsStarted())) {
@@ -3741,10 +3754,10 @@
 
     // same basic group? then sort by string.
     fh_->ChangeField(field1);
-    StringPiece name1(fh_->GetName());
+    const char* name1 = fh_->GetName();
     fh_->ChangeField(field2);
-    StringPiece name2(fh_->GetName());
-    return name1 < name2;
+    const char* name2 = fh_->GetName();
+    return strcmp(name1, name2) < 0;
   }
 
   FieldHelper* fh_;
@@ -3778,7 +3791,9 @@
   // minimizes disruption of C++ version such as Class and Method.
   std::deque<mirror::ArtField*> grouped_and_sorted_fields;
   for (size_t i = 0; i < num_fields; i++) {
-    grouped_and_sorted_fields.push_back(fields->Get(i));
+    mirror::ArtField* f = fields->Get(i);
+    CHECK(f != NULL);
+    grouped_and_sorted_fields.push_back(f);
   }
   FieldHelper fh(NULL, this);
   std::sort(grouped_and_sorted_fields.begin(),
@@ -3845,7 +3860,7 @@
 
   // We lie to the GC about the java.lang.ref.Reference.referent field, so it doesn't scan it.
   if (!is_static &&
-      StringPiece(ClassHelper(klass.get(), this).GetDescriptor()) == "Ljava/lang/ref/Reference;") {
+      (ClassHelper(klass.get(), this).GetDescriptorAsStringPiece() == "Ljava/lang/ref/Reference;")) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields);
@@ -3854,39 +3869,39 @@
     --num_reference_fields;
   }
 
-#ifndef NDEBUG
-  // Make sure that all reference fields appear before
-  // non-reference fields, and all double-wide fields are aligned.
-  bool seen_non_ref = false;
-  for (size_t i = 0; i < num_fields; i++) {
-    mirror::ArtField* field = fields->Get(i);
-    if (false) {  // enable to debug field layout
-      LOG(INFO) << "LinkFields: " << (is_static ? "static" : "instance")
-                << " class=" << PrettyClass(klass.get())
-                << " field=" << PrettyField(field)
-                << " offset=" << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()),
-                                                   false);
-    }
-    fh.ChangeField(field);
-    Primitive::Type type = fh.GetTypeAsPrimitiveType();
-    bool is_primitive = type != Primitive::kPrimNot;
-    if (StringPiece(ClassHelper(klass.get(), this).GetDescriptor()) == "Ljava/lang/ref/Reference;" &&
-        StringPiece(fh.GetName()) == "referent") {
-      is_primitive = true;  // We lied above, so we have to expect a lie here.
-    }
-    if (is_primitive) {
-      if (!seen_non_ref) {
-        seen_non_ref = true;
-        DCHECK_EQ(num_reference_fields, i);
+  if (kIsDebugBuild) {
+    // Make sure that all reference fields appear before
+    // non-reference fields, and all double-wide fields are aligned.
+    bool seen_non_ref = false;
+    for (size_t i = 0; i < num_fields; i++) {
+      mirror::ArtField* field = fields->Get(i);
+      if (false) {  // enable to debug field layout
+        LOG(INFO) << "LinkFields: " << (is_static ? "static" : "instance")
+                    << " class=" << PrettyClass(klass.get())
+                    << " field=" << PrettyField(field)
+                    << " offset=" << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()),
+                                                       false);
       }
-    } else {
-      DCHECK(!seen_non_ref);
+      fh.ChangeField(field);
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      bool is_primitive = type != Primitive::kPrimNot;
+      if (ClassHelper(klass.get(), this).GetDescriptorAsStringPiece() == "Ljava/lang/ref/Reference;" &&
+          fh.GetNameAsStringPiece() == "referent") {
+        is_primitive = true;  // We lied above, so we have to expect a lie here.
+      }
+      if (is_primitive) {
+        if (!seen_non_ref) {
+          seen_non_ref = true;
+          DCHECK_EQ(num_reference_fields, i);
+        }
+      } else {
+        DCHECK(!seen_non_ref);
+      }
+    }
+    if (!seen_non_ref) {
+      DCHECK_EQ(num_fields, num_reference_fields);
     }
   }
-  if (!seen_non_ref) {
-    DCHECK_EQ(num_fields, num_reference_fields);
-  }
-#endif
   size = field_offset.Uint32Value();
   // Update klass
   if (is_static) {
@@ -4002,11 +4017,11 @@
 }
 
 mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
-                                                   uint32_t method_idx,
-                                                   mirror::DexCache* dex_cache,
-                                                   mirror::ClassLoader* class_loader,
-                                                   const mirror::ArtMethod* referrer,
-                                                   InvokeType type) {
+                                              uint32_t method_idx,
+                                              mirror::DexCache* dex_cache,
+                                              mirror::ClassLoader* class_loader,
+                                              const mirror::ArtMethod* referrer,
+                                              InvokeType type) {
   DCHECK(dex_cache != NULL);
   // Check for hit in the dex cache.
   mirror::ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx);
@@ -4041,7 +4056,7 @@
   if (resolved == NULL) {
     // Search by name, which works across dex files.
     const char* name = dex_file.StringDataByIdx(method_id.name_idx_);
-    std::string signature(dex_file.CreateMethodSignature(method_id.proto_idx_, NULL));
+    const Signature signature = dex_file.GetMethodSignature(method_id);
     switch (type) {
       case kDirect:  // Fall-through.
       case kStatic:
@@ -4071,7 +4086,7 @@
     // We failed to find the method which means either an access error, an incompatible class
     // change, or no such method. First try to find the method among direct and virtual methods.
     const char* name = dex_file.StringDataByIdx(method_id.name_idx_);
-    std::string signature(dex_file.CreateMethodSignature(method_id.proto_idx_, NULL));
+    const Signature signature = dex_file.GetMethodSignature(method_id);
     switch (type) {
       case kDirect:
       case kStatic:
@@ -4189,9 +4204,9 @@
 }
 
 mirror::ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
-                                            uint32_t field_idx,
-                                            mirror::DexCache* dex_cache,
-                                            mirror::ClassLoader* class_loader) {
+                                               uint32_t field_idx,
+                                               mirror::DexCache* dex_cache,
+                                               mirror::ClassLoader* class_loader) {
   DCHECK(dex_cache != NULL);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
@@ -4204,8 +4219,9 @@
     return NULL;
   }
 
-  const char* name = dex_file.GetFieldName(field_id);
-  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+  StringPiece name(dex_file.StringDataAsStringPieceByIdx(field_id.name_idx_));
+  StringPiece type(dex_file.StringDataAsStringPieceByIdx(
+      dex_file.GetTypeId(field_id.type_idx_).descriptor_idx_));
   resolved = klass->FindField(name, type);
   if (resolved != NULL) {
     dex_cache->SetResolvedField(field_idx, resolved);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index bbc2877..029b73e 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -152,7 +152,7 @@
     EXPECT_TRUE(method != NULL);
     EXPECT_TRUE(method->GetClass() != NULL);
     EXPECT_TRUE(mh.GetName() != NULL);
-    EXPECT_TRUE(mh.GetSignature() != NULL);
+    EXPECT_TRUE(mh.GetSignature() != Signature::NoSignature());
 
     EXPECT_TRUE(method->GetDexCacheStrings() != NULL);
     EXPECT_TRUE(method->GetDexCacheResolvedMethods() != NULL);
@@ -340,8 +340,9 @@
     }
   }
 
-  static void TestRootVisitor(const mirror::Object* root, void*) {
+  static mirror::Object* TestRootVisitor(mirror::Object* root, void*) {
     EXPECT_TRUE(root != NULL);
+    return root;
   }
 };
 
@@ -941,15 +942,16 @@
   EXPECT_TRUE(K->IsAssignableFrom(B));
   EXPECT_TRUE(J->IsAssignableFrom(B));
 
-  mirror::ArtMethod* Ii = I->FindVirtualMethod("i", "()V");
-  mirror::ArtMethod* Jj1 = J->FindVirtualMethod("j1", "()V");
-  mirror::ArtMethod* Jj2 = J->FindVirtualMethod("j2", "()V");
-  mirror::ArtMethod* Kj1 = K->FindInterfaceMethod("j1", "()V");
-  mirror::ArtMethod* Kj2 = K->FindInterfaceMethod("j2", "()V");
-  mirror::ArtMethod* Kk = K->FindInterfaceMethod("k", "()V");
-  mirror::ArtMethod* Ai = A->FindVirtualMethod("i", "()V");
-  mirror::ArtMethod* Aj1 = A->FindVirtualMethod("j1", "()V");
-  mirror::ArtMethod* Aj2 = A->FindVirtualMethod("j2", "()V");
+  const Signature void_sig = I->GetDexCache()->GetDexFile()->CreateSignature("()V");
+  mirror::ArtMethod* Ii = I->FindVirtualMethod("i", void_sig);
+  mirror::ArtMethod* Jj1 = J->FindVirtualMethod("j1", void_sig);
+  mirror::ArtMethod* Jj2 = J->FindVirtualMethod("j2", void_sig);
+  mirror::ArtMethod* Kj1 = K->FindInterfaceMethod("j1", void_sig);
+  mirror::ArtMethod* Kj2 = K->FindInterfaceMethod("j2", void_sig);
+  mirror::ArtMethod* Kk = K->FindInterfaceMethod("k", void_sig);
+  mirror::ArtMethod* Ai = A->FindVirtualMethod("i", void_sig);
+  mirror::ArtMethod* Aj1 = A->FindVirtualMethod("j1", void_sig);
+  mirror::ArtMethod* Aj2 = A->FindVirtualMethod("j2", void_sig);
   ASSERT_TRUE(Ii != NULL);
   ASSERT_TRUE(Jj1 != NULL);
   ASSERT_TRUE(Jj2 != NULL);
@@ -994,7 +996,7 @@
   CHECK(dex_file != NULL);
 
   mirror::Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader.get());
-  mirror::ArtMethod* clinit = klass->FindDirectMethod("<clinit>", "()V");
+  mirror::ArtMethod* clinit = klass->FindClassInitializer();
   mirror::ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;");
   const DexFile::StringId* string_id = dex_file->FindStringId("LStaticsFromCode;");
   ASSERT_TRUE(string_id != NULL);
diff --git a/runtime/common_test.h b/runtime/common_test.h
index dc1f592..fe54d03 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -286,12 +286,7 @@
     if (java_lang_dex_file_ == NULL) {
       LOG(FATAL) << "Could not open .dex file '" << GetLibCoreDexFileName() << "'\n";
     }
-    conscrypt_file_ = DexFile::Open(GetConscryptFileName(), GetConscryptFileName());
-    if (conscrypt_file_  == NULL) {
-      LOG(FATAL) << "Could not open .dex file '" << GetConscryptFileName() << "'\n";
-    }
     boot_class_path_.push_back(java_lang_dex_file_);
-    boot_class_path_.push_back(conscrypt_file_);
 
     std::string min_heap_string(StringPrintf("-Xms%zdm", gc::Heap::kDefaultInitialSize / MB));
     std::string max_heap_string(StringPrintf("-Xmx%zdm", gc::Heap::kDefaultMaximumSize / MB));
@@ -398,10 +393,6 @@
     return GetDexFileName("core-libart");
   }
 
-  std::string GetConscryptFileName() {
-    return GetDexFileName("conscrypt");
-  }
-
   std::string GetDexFileName(const std::string& jar_prefix) {
     if (IsHost()) {
       const char* host_dir = getenv("ANDROID_HOST_OUT");
@@ -520,7 +511,6 @@
   std::string android_data_;
   std::string dalvik_cache_;
   const DexFile* java_lang_dex_file_;  // owned by runtime_
-  const DexFile* conscrypt_file_;  // owned by runtime_
   std::vector<const DexFile*> boot_class_path_;
   UniquePtr<Runtime> runtime_;
   // Owned by the runtime
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 26ce5be..189e3ed 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -265,7 +265,7 @@
 // NoSuchMethodError
 
 void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
-                            const StringPiece& signature) {
+                            const Signature& signature) {
   std::ostringstream msg;
   ClassHelper kh(c);
   msg << "No " << type << " method " << name << signature
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 99c6343..1d77e2d 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -27,6 +27,7 @@
 class Class;
 class Object;
 }  // namespace mirror
+class Signature;
 class StringPiece;
 class ThrowLocation;
 
@@ -140,7 +141,7 @@
 // NoSuchMethodError
 
 void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
-                            const StringPiece& signature)
+                            const Signature& signature)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 void ThrowNoSuchMethodError(uint32_t method_idx)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 88269e5..57bd57e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -674,15 +674,15 @@
   Locks::mutator_lock_->ExclusiveUnlock(self);
   Locks::mutator_lock_->SharedLock(self);
 
-  if (monitor_info.owner != NULL) {
-    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner->GetPeer()));
+  if (monitor_info.owner_ != NULL) {
+    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner_->GetPeer()));
   } else {
     expandBufAddObjectId(reply, gRegistry->Add(NULL));
   }
-  expandBufAdd4BE(reply, monitor_info.entry_count);
-  expandBufAdd4BE(reply, monitor_info.waiters.size());
-  for (size_t i = 0; i < monitor_info.waiters.size(); ++i) {
-    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.waiters[i]->GetPeer()));
+  expandBufAdd4BE(reply, monitor_info.entry_count_);
+  expandBufAdd4BE(reply, monitor_info.waiters_.size());
+  for (size_t i = 0; i < monitor_info.waiters_.size(); ++i) {
+    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.waiters_[i]->GetPeer()));
   }
   return JDWP::ERR_NONE;
 }
@@ -891,7 +891,7 @@
   }
 
   if (pDescriptor != NULL) {
-    *pDescriptor = ClassHelper(c).GetDescriptor();
+    *pDescriptor = ClassHelper(c).GetDescriptorAsStringPiece().as_string();
   }
   return JDWP::ERR_NONE;
 }
@@ -928,13 +928,13 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::GetSignature(JDWP::RefTypeId class_id, std::string& signature) {
+JDWP::JdwpError Dbg::GetSignature(JDWP::RefTypeId class_id, std::string* signature) {
   JDWP::JdwpError status;
   mirror::Class* c = DecodeClass(class_id, status);
   if (c == NULL) {
     return status;
   }
-  signature = ClassHelper(c).GetDescriptor();
+  *signature = ClassHelper(c).GetDescriptorAsStringPiece().as_string();
   return JDWP::ERR_NONE;
 }
 
@@ -1065,8 +1065,8 @@
     LOG(WARNING) << __FUNCTION__ << " access out of bounds: offset=" << offset << "; count=" << count;
     return JDWP::ERR_INVALID_LENGTH;
   }
-  std::string descriptor(ClassHelper(dst->GetClass()).GetDescriptor());
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(descriptor.c_str() + 1);
+  const char* descriptor = ClassHelper(dst->GetClass()).GetDescriptor();
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(descriptor + 1);
 
   if (IsPrimitiveTag(tag)) {
     size_t width = GetTagWidth(tag);
@@ -1287,7 +1287,7 @@
     MethodHelper mh(m);
     expandBufAddMethodId(pReply, ToMethodId(m));
     expandBufAddUtf8String(pReply, mh.GetName());
-    expandBufAddUtf8String(pReply, mh.GetSignature());
+    expandBufAddUtf8String(pReply, mh.GetSignature().ToString());
     if (with_generic) {
       static const char genericSignature[1] = "";
       expandBufAddUtf8String(pReply, genericSignature);
@@ -1935,7 +1935,8 @@
   }
   // Suspend thread to build stack trace.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer.get(), request_suspension, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer.get(), request_suspension, true,
+                                                   &timed_out);
   if (thread != NULL) {
     return JDWP::ERR_NONE;
   } else if (timed_out) {
@@ -2287,7 +2288,8 @@
   // since the class may not yet be verified.
   int state = JDWP::CS_VERIFIED | JDWP::CS_PREPARED;
   JDWP::JdwpTypeTag tag = c->IsInterface() ? JDWP::TT_INTERFACE : JDWP::TT_CLASS;
-  gJdwpState->PostClassPrepare(tag, gRegistry->Add(c), ClassHelper(c).GetDescriptor(), state);
+  gJdwpState->PostClassPrepare(tag, gRegistry->Add(c),
+                               ClassHelper(c).GetDescriptorAsStringPiece().as_string(), state);
 }
 
 void Dbg::UpdateDebugger(Thread* thread, mirror::Object* this_object,
@@ -2411,7 +2413,8 @@
         soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
         jobject thread_peer = gRegistry->GetJObject(thread_id);
         bool timed_out;
-        Thread* suspended_thread = Thread::SuspendForDebugger(thread_peer, true, &timed_out);
+        Thread* suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
+                                                                   &timed_out);
         CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kWaitingForDebuggerSuspension);
         if (suspended_thread == NULL) {
           // Thread terminated from under us while suspending.
@@ -3011,7 +3014,7 @@
 
   if (type == CHUNK_TYPE("THDE")) {
     uint8_t buf[4];
-    JDWP::Set4BE(&buf[0], t->GetThinLockId());
+    JDWP::Set4BE(&buf[0], t->GetThreadId());
     Dbg::DdmSendChunk(CHUNK_TYPE("THDE"), 4, buf);
   } else {
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
@@ -3021,7 +3024,7 @@
     const jchar* chars = (name.get() != NULL) ? name->GetCharArray()->GetData() : NULL;
 
     std::vector<uint8_t> bytes;
-    JDWP::Append4BE(bytes, t->GetThinLockId());
+    JDWP::Append4BE(bytes, t->GetThreadId());
     JDWP::AppendUtf16BE(bytes, chars, char_count);
     CHECK_EQ(bytes.size(), char_count*2 + sizeof(uint32_t)*2);
     Dbg::DdmSendChunk(type, bytes);
@@ -3486,7 +3489,9 @@
       recent_allocation_records_ = new AllocRecord[gAllocRecordMax];
       CHECK(recent_allocation_records_ != NULL);
     }
+    Runtime::Current()->InstrumentQuickAllocEntryPoints();
   } else {
+    Runtime::Current()->UninstrumentQuickAllocEntryPoints();
     delete[] recent_allocation_records_;
     recent_allocation_records_ = NULL;
   }
@@ -3542,7 +3547,7 @@
   AllocRecord* record = &recent_allocation_records_[gAllocRecordHead];
   record->type = type;
   record->byte_count = byte_count;
-  record->thin_lock_id = self->GetThinLockId();
+  record->thin_lock_id = self->GetThreadId();
 
   // Fill in the stack trace.
   AllocRecordStackVisitor visitor(self, record);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index d0fe445..8574a33 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -149,7 +149,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetReferenceType(JDWP::ObjectId object_id, JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static JDWP::JdwpError GetSignature(JDWP::RefTypeId ref_type_id, std::string& signature)
+  static JDWP::JdwpError GetSignature(JDWP::RefTypeId ref_type_id, std::string* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetSourceFile(JDWP::RefTypeId ref_type_id, std::string& source_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index dee8026..c57a1e7 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_DEX_FILE_INL_H_
 
 #include "base/logging.h"
+#include "base/stringpiece.h"
 #include "dex_file.h"
 #include "leb128.h"
 #include "utils.h"
@@ -36,6 +37,20 @@
   return reinterpret_cast<const char*>(ptr);
 }
 
+inline StringPiece DexFile::StringDataAsStringPieceByIdx(uint32_t idx) const {
+  if (idx == kDexNoIndex) {
+    return StringPiece();
+  }
+  const StringId& string_id = GetStringId(idx);
+  uint32_t length;
+  const char* data = GetStringDataAndLength(string_id, &length);
+  return StringPiece(data, static_cast<int>(length));
+}
+
+inline const Signature DexFile::GetMethodSignature(const MethodId& method_id) const {
+  return Signature(this, GetProtoId(method_id.proto_idx_));
+}
+
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 9034628..ac133a3 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -38,7 +38,7 @@
 #include "safe_map.h"
 #include "thread.h"
 #include "UniquePtr.h"
-#include "utf.h"
+#include "utf-inl.h"
 #include "utils.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
@@ -528,8 +528,8 @@
 }
 
 // Given a signature place the type ids into the given vector
-bool DexFile::CreateTypeList(uint16_t* return_type_idx, std::vector<uint16_t>* param_type_idxs,
-                             const std::string& signature) const {
+bool DexFile::CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
+                             std::vector<uint16_t>* param_type_idxs) const {
   if (signature[0] != '(') {
     return false;
   }
@@ -543,6 +543,7 @@
       process_return = true;
       continue;
     }
+    // TODO: avoid building a string.
     std::string descriptor;
     descriptor += c;
     while (c == '[') {  // process array prefix
@@ -582,35 +583,18 @@
   return false;  // failed to correctly parse return type
 }
 
-// Materializes the method descriptor for a method prototype.  Method
-// descriptors are not stored directly in the dex file.  Instead, one
-// must assemble the descriptor from references in the prototype.
-std::string DexFile::CreateMethodSignature(uint32_t proto_idx, int32_t* unicode_length) const {
-  const ProtoId& proto_id = GetProtoId(proto_idx);
-  std::string descriptor;
-  descriptor.push_back('(');
-  const TypeList* type_list = GetProtoParameters(proto_id);
-  size_t parameter_length = 0;
-  if (type_list != NULL) {
-    // A non-zero number of arguments.  Append the type names.
-    for (size_t i = 0; i < type_list->Size(); ++i) {
-      const TypeItem& type_item = type_list->GetTypeItem(i);
-      uint32_t type_idx = type_item.type_idx_;
-      uint32_t type_length;
-      const char* name = StringByTypeIdx(type_idx, &type_length);
-      parameter_length += type_length;
-      descriptor.append(name);
-    }
+const Signature DexFile::CreateSignature(const StringPiece& signature) const {
+  uint16_t return_type_idx;
+  std::vector<uint16_t> param_type_indices;
+  bool success = CreateTypeList(signature, &return_type_idx, &param_type_indices);
+  if (!success) {
+    return Signature::NoSignature();
   }
-  descriptor.push_back(')');
-  uint32_t return_type_idx = proto_id.return_type_idx_;
-  uint32_t return_type_length;
-  const char* name = StringByTypeIdx(return_type_idx, &return_type_length);
-  descriptor.append(name);
-  if (unicode_length != NULL) {
-    *unicode_length = parameter_length + return_type_length + 2;  // 2 for ( and )
+  const ProtoId* proto_id = FindProtoId(return_type_idx, param_type_indices);
+  if (proto_id == NULL) {
+    return Signature::NoSignature();
   }
-  return descriptor;
+  return Signature(this, *proto_id);
 }
 
 int32_t DexFile::GetLineNumFromPC(const mirror::ArtMethod* method, uint32_t rel_pc) const {
@@ -856,6 +840,30 @@
   }
 }
 
+std::string Signature::ToString() const {
+  if (dex_file_ == nullptr) {
+    CHECK(proto_id_ == nullptr);
+    return "<no signature>";
+  }
+  const DexFile::TypeList* params = dex_file_->GetProtoParameters(*proto_id_);
+  std::string result;
+  if (params == nullptr) {
+    result += "()";
+  } else {
+    result += "(";
+    for (uint32_t i = 0; i < params->Size(); ++i) {
+      result += dex_file_->StringByTypeIdx(params->GetTypeItem(i).type_idx_);
+    }
+    result += ")";
+  }
+  result += dex_file_->StringByTypeIdx(proto_id_->return_type_idx_);
+  return result;
+}
+
+std::ostream& operator<<(std::ostream& os, const Signature& sig) {
+  return os << sig.ToString();
+}
+
 // Decodes the header section from the class data bytes.
 void ClassDataItemIterator::ReadClassDataHeader() {
   CHECK(ptr_pos_ != NULL);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 346154c..7f92f49 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -40,6 +40,8 @@
   class DexCache;
 }  // namespace mirror
 class ClassLinker;
+class Signature;
+class StringPiece;
 class ZipArchive;
 
 // TODO: move all of the macro functionality into the DexCache class.
@@ -432,6 +434,8 @@
     return GetStringDataAndLength(string_id, unicode_length);
   }
 
+  StringPiece StringDataAsStringPieceByIdx(uint32_t idx) const;
+
   const char* StringDataByIdx(uint32_t idx) const {
     uint32_t unicode_length;
     return StringDataAndLengthByIdx(idx, &unicode_length);
@@ -556,10 +560,8 @@
     return GetProtoId(method_id.proto_idx_);
   }
 
-  // Returns the signature of a method id.
-  const std::string GetMethodSignature(const MethodId& method_id) const {
-    return CreateMethodSignature(method_id.proto_idx_, NULL);
-  }
+  // Returns a representation of the signature of a method id.
+  const Signature GetMethodSignature(const MethodId& method_id) const;
 
   // Returns the name of a method id.
   const char* GetMethodName(const MethodId& method_id) const {
@@ -653,15 +655,16 @@
   }
 
   // Looks up a proto id for a given return type and signature type list
-  const ProtoId* FindProtoId(uint16_t return_type_id,
+  const ProtoId* FindProtoId(uint16_t return_type_idx,
                              const std::vector<uint16_t>& signature_type_idxs_) const;
 
   // Given a signature place the type ids into the given vector, returns true on success
-  bool CreateTypeList(uint16_t* return_type_idx, std::vector<uint16_t>* param_type_idxs,
-                      const std::string& signature) const;
+  bool CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
+                      std::vector<uint16_t>* param_type_idxs) const;
 
-  // Given a proto_idx decode the type list and return type into a method signature
-  std::string CreateMethodSignature(uint32_t proto_idx, int32_t* unicode_length) const;
+  // Create a Signature from the given string signature or return Signature::NoSignature if not
+  // possible.
+  const Signature CreateSignature(const StringPiece& signature) const;
 
   // Returns the short form method descriptor for the given prototype.
   const char* GetShorty(uint32_t proto_idx) const {
@@ -938,6 +941,83 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(DexFileParameterIterator);
 };
 
+// Abstract the signature of a method.
+class Signature {
+ public:
+  std::string ToString() const;
+
+  static Signature NoSignature() {
+    return Signature();
+  }
+
+  bool operator==(const Signature& rhs) const {
+    if (dex_file_ == nullptr) {
+      return rhs.dex_file_ == nullptr;
+    }
+    if (rhs.dex_file_ == nullptr) {
+      return false;
+    }
+    if (dex_file_ == rhs.dex_file_) {
+      return proto_id_ == rhs.proto_id_;
+    }
+    StringPiece shorty(dex_file_->StringDataAsStringPieceByIdx(proto_id_->shorty_idx_));
+    if (shorty != rhs.dex_file_->StringDataAsStringPieceByIdx(rhs.proto_id_->shorty_idx_)) {
+      return false;  // Shorty mismatch.
+    }
+    if (shorty[0] == 'L') {
+      const DexFile::TypeId& return_type_id = dex_file_->GetTypeId(proto_id_->return_type_idx_);
+      const DexFile::TypeId& rhs_return_type_id =
+          rhs.dex_file_->GetTypeId(rhs.proto_id_->return_type_idx_);
+      if (dex_file_->StringDataAsStringPieceByIdx(return_type_id.descriptor_idx_) !=
+          rhs.dex_file_->StringDataAsStringPieceByIdx(rhs_return_type_id.descriptor_idx_)) {
+        return false;  // Return type mismatch.
+      }
+    }
+    if (shorty.find('L', 1) != StringPiece::npos) {
+      const DexFile::TypeList* params = dex_file_->GetProtoParameters(*proto_id_);
+      const DexFile::TypeList* rhs_params = rhs.dex_file_->GetProtoParameters(*rhs.proto_id_);
+      // Both lists are empty or have contents, or else shorty is broken.
+      DCHECK_EQ(params == nullptr, rhs_params == nullptr);
+      if (params != nullptr) {
+        uint32_t params_size = params->Size();
+        DCHECK_EQ(params_size, rhs_params->Size());  // Parameter list size must match.
+        for (uint32_t i = 0; i < params_size; ++i) {
+          const DexFile::TypeId& param_id = dex_file_->GetTypeId(params->GetTypeItem(i).type_idx_);
+          const DexFile::TypeId& rhs_param_id =
+              rhs.dex_file_->GetTypeId(rhs_params->GetTypeItem(i).type_idx_);
+          if (dex_file_->StringDataAsStringPieceByIdx(param_id.descriptor_idx_) !=
+              rhs.dex_file_->StringDataAsStringPieceByIdx(rhs_param_id.descriptor_idx_)) {
+            return false;  // Parameter type mismatch.
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const Signature& rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator==(const StringPiece& rhs) const {
+    // TODO: Avoid temporary string allocation.
+    return ToString() == rhs;
+  }
+
+ private:
+  Signature(const DexFile* dex, const DexFile::ProtoId& proto) : dex_file_(dex), proto_id_(&proto) {
+  }
+
+  Signature() : dex_file_(nullptr), proto_id_(nullptr) {
+  }
+
+  friend class DexFile;
+
+  const DexFile* const dex_file_;
+  const DexFile::ProtoId* const proto_id_;
+};
+std::ostream& operator<<(std::ostream& os, const Signature& sig);
+
 // Iterate and decode class_data_item
 class ClassDataItemIterator {
  public:
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 272c42d..7575d4a 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -137,14 +137,14 @@
   EXPECT_STREQ("LNested;", raw->GetClassDescriptor(c1));
 }
 
-TEST_F(DexFileTest, CreateMethodSignature) {
+TEST_F(DexFileTest, GetMethodSignature) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* raw(OpenTestDexFile("CreateMethodSignature"));
+  const DexFile* raw(OpenTestDexFile("GetMethodSignature"));
   ASSERT_TRUE(raw != NULL);
   EXPECT_EQ(1U, raw->NumClassDefs());
 
   const DexFile::ClassDef& class_def = raw->GetClassDef(0);
-  ASSERT_STREQ("LCreateMethodSignature;", raw->GetClassDescriptor(class_def));
+  ASSERT_STREQ("LGetMethodSignature;", raw->GetClassDescriptor(class_def));
 
   const byte* class_data = raw->GetClassData(class_def);
   ASSERT_TRUE(class_data != NULL);
@@ -156,11 +156,9 @@
   {
     ASSERT_EQ(1U, it.NumDirectMethods());
     const DexFile::MethodId& method_id = raw->GetMethodId(it.GetMemberIndex());
-    uint32_t proto_idx = method_id.proto_idx_;
     const char* name = raw->StringDataByIdx(method_id.name_idx_);
     ASSERT_STREQ("<init>", name);
-    int32_t length;
-    std::string signature(raw->CreateMethodSignature(proto_idx, &length));
+    std::string signature(raw->GetMethodSignature(method_id).ToString());
     ASSERT_EQ("()V", signature);
   }
 
@@ -173,9 +171,7 @@
     const char* name = raw->StringDataByIdx(method_id.name_idx_);
     ASSERT_STREQ("m1", name);
 
-    uint32_t proto_idx = method_id.proto_idx_;
-    int32_t length;
-    std::string signature(raw->CreateMethodSignature(proto_idx, &length));
+    std::string signature(raw->GetMethodSignature(method_id).ToString());
     ASSERT_EQ("(IDJLjava/lang/Object;)Ljava/lang/Float;", signature);
   }
 
@@ -186,20 +182,18 @@
     const char* name = raw->StringDataByIdx(method_id.name_idx_);
     ASSERT_STREQ("m2", name);
 
-    uint32_t proto_idx = method_id.proto_idx_;
-    int32_t length;
-    std::string signature(raw->CreateMethodSignature(proto_idx, &length));
-    ASSERT_EQ("(ZSC)LCreateMethodSignature;", signature);
+    std::string signature(raw->GetMethodSignature(method_id).ToString());
+    ASSERT_EQ("(ZSC)LGetMethodSignature;", signature);
   }
 }
 
 TEST_F(DexFileTest, FindStringId) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* raw(OpenTestDexFile("CreateMethodSignature"));
+  const DexFile* raw(OpenTestDexFile("GetMethodSignature"));
   ASSERT_TRUE(raw != NULL);
   EXPECT_EQ(1U, raw->NumClassDefs());
 
-  const char* strings[] = { "LCreateMethodSignature;", "Ljava/lang/Float;", "Ljava/lang/Object;",
+  const char* strings[] = { "LGetMethodSignature;", "Ljava/lang/Float;", "Ljava/lang/Object;",
       "D", "I", "J", NULL };
   for (size_t i = 0; strings[i] != NULL; i++) {
     const char* str = strings[i];
@@ -245,11 +239,10 @@
     const DexFile::StringId& name = java_lang_dex_file_->GetStringId(to_find.name_idx_);
     const DexFile::ProtoId& signature = java_lang_dex_file_->GetProtoId(to_find.proto_idx_);
     const DexFile::MethodId* found = java_lang_dex_file_->FindMethodId(klass, name, signature);
-    int32_t length;
     ASSERT_TRUE(found != NULL) << "Didn't find method " << i << ": "
         << java_lang_dex_file_->StringByTypeIdx(to_find.class_idx_) << "."
         << java_lang_dex_file_->GetStringData(name)
-        << java_lang_dex_file_->CreateMethodSignature(to_find.proto_idx_, &length);
+        << java_lang_dex_file_->GetMethodSignature(to_find);
     EXPECT_EQ(java_lang_dex_file_->GetIndexForMethodId(*found), i);
   }
 }
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 5b076e0..7dc2b31 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -21,7 +21,7 @@
 #include "leb128.h"
 #include "safe_map.h"
 #include "UniquePtr.h"
-#include "utf.h"
+#include "utf-inl.h"
 #include "utils.h"
 #include "zip_archive.h"
 
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index 6e21273..207b0b6 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -24,29 +24,29 @@
 //------------------------------------------------------------------------------
 // VRegA
 //------------------------------------------------------------------------------
-inline int8_t Instruction::VRegA_10t() const {
+inline int8_t Instruction::VRegA_10t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k10t);
-  return static_cast<int8_t>(InstAA());
+  return static_cast<int8_t>(InstAA(inst_data));
 }
 
-inline uint8_t Instruction::VRegA_10x() const {
+inline uint8_t Instruction::VRegA_10x(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k10x);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint4_t Instruction::VRegA_11n() const {
+inline uint4_t Instruction::VRegA_11n(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k11n);
-  return InstA();
+  return InstA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_11x() const {
+inline uint8_t Instruction::VRegA_11x(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k11x);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint4_t Instruction::VRegA_12x() const {
+inline uint4_t Instruction::VRegA_12x(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k12x);
-  return InstA();
+  return InstA(inst_data);
 }
 
 inline int16_t Instruction::VRegA_20t() const {
@@ -54,54 +54,54 @@
   return static_cast<int16_t>(Fetch16(1));
 }
 
-inline uint8_t Instruction::VRegA_21c() const {
+inline uint8_t Instruction::VRegA_21c(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k21c);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_21h() const {
+inline uint8_t Instruction::VRegA_21h(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k21h);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_21s() const {
+inline uint8_t Instruction::VRegA_21s(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k21s);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_21t() const {
+inline uint8_t Instruction::VRegA_21t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k21t);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_22b() const {
+inline uint8_t Instruction::VRegA_22b(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22b);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint4_t Instruction::VRegA_22c() const {
+inline uint4_t Instruction::VRegA_22c(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22c);
-  return InstA();
+  return InstA(inst_data);
 }
 
-inline uint4_t Instruction::VRegA_22s() const {
+inline uint4_t Instruction::VRegA_22s(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22s);
-  return InstA();
+  return InstA(inst_data);
 }
 
-inline uint4_t Instruction::VRegA_22t() const {
+inline uint4_t Instruction::VRegA_22t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22t);
-  return InstA();
+  return InstA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_22x() const {
+inline uint8_t Instruction::VRegA_22x(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22x);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_23x() const {
+inline uint8_t Instruction::VRegA_23x(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k23x);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
 inline int32_t Instruction::VRegA_30t() const {
@@ -109,19 +109,19 @@
   return static_cast<int32_t>(Fetch32(1));
 }
 
-inline uint8_t Instruction::VRegA_31c() const {
+inline uint8_t Instruction::VRegA_31c(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_31i() const {
+inline uint8_t Instruction::VRegA_31i(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k31i);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_31t() const {
+inline uint8_t Instruction::VRegA_31t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k31t);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
 inline uint16_t Instruction::VRegA_32x() const {
@@ -129,32 +129,32 @@
   return Fetch16(1);
 }
 
-inline uint4_t Instruction::VRegA_35c() const {
+inline uint4_t Instruction::VRegA_35c(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
-  return InstB();  // This is labeled A in the spec.
+  return InstB(inst_data);  // This is labeled A in the spec.
 }
 
-inline uint8_t Instruction::VRegA_3rc() const {
+inline uint8_t Instruction::VRegA_3rc(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k3rc);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
-inline uint8_t Instruction::VRegA_51l() const {
+inline uint8_t Instruction::VRegA_51l(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k51l);
-  return InstAA();
+  return InstAA(inst_data);
 }
 
 //------------------------------------------------------------------------------
 // VRegB
 //------------------------------------------------------------------------------
-inline int4_t Instruction::VRegB_11n() const {
+inline int4_t Instruction::VRegB_11n(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k11n);
-  return static_cast<int4_t>((InstB() << 28) >> 28);
+  return static_cast<int4_t>((InstB(inst_data) << 28) >> 28);
 }
 
-inline uint4_t Instruction::VRegB_12x() const {
+inline uint4_t Instruction::VRegB_12x(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k12x);
-  return InstB();
+  return InstB(inst_data);
 }
 
 inline uint16_t Instruction::VRegB_21c() const {
@@ -182,19 +182,19 @@
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
-inline uint4_t Instruction::VRegB_22c() const {
+inline uint4_t Instruction::VRegB_22c(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22c);
-  return InstB();
+  return InstB(inst_data);
 }
 
-inline uint4_t Instruction::VRegB_22s() const {
+inline uint4_t Instruction::VRegB_22s(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22s);
-  return InstB();
+  return InstB(inst_data);
 }
 
-inline uint4_t Instruction::VRegB_22t() const {
+inline uint4_t Instruction::VRegB_22t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k22t);
-  return InstB();
+  return InstB(inst_data);
 }
 
 inline uint16_t Instruction::VRegB_22x() const {
@@ -281,7 +281,7 @@
   return Fetch16(2);
 }
 
-inline void Instruction::GetArgs(uint32_t arg[5]) const {
+inline void Instruction::GetArgs(uint32_t arg[5], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
   /*
@@ -295,7 +295,8 @@
    * method constant (or equivalent) is always in vB.
    */
   uint16_t regList = Fetch16(2);
-  uint4_t count = InstB();  // This is labeled A in the spec.
+  uint4_t count = InstB(inst_data);  // This is labeled A in the spec.
+  DCHECK_LE(count, 5U) << "Invalid arg count in 35c (" << count << ")";
 
   /*
    * Copy the argument registers into the arg[] array, and
@@ -305,15 +306,13 @@
    * copies of those.) Note that cases 5..2 fall through.
    */
   switch (count) {
-    case 5: arg[4] = InstA();
+    case 5: arg[4] = InstA(inst_data);
     case 4: arg[3] = (regList >> 12) & 0x0f;
     case 3: arg[2] = (regList >> 8) & 0x0f;
     case 2: arg[1] = (regList >> 4) & 0x0f;
     case 1: arg[0] = regList & 0x0f; break;
-    case 0: break;  // Valid, but no need to do anything.
-    default:
-      LOG(ERROR) << "Invalid arg count in 35c (" << count << ")";
-      return;
+    default:  // case 0
+      break;  // Valid, but no need to do anything.
   }
 }
 
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 13b0f1c..c434cdd 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -217,44 +217,122 @@
   // VRegA
   bool HasVRegA() const;
   int32_t VRegA() const;
-  int8_t VRegA_10t() const;
-  uint8_t VRegA_10x() const;
-  uint4_t VRegA_11n() const;
-  uint8_t VRegA_11x() const;
-  uint4_t VRegA_12x() const;
+
+  int8_t VRegA_10t() const {
+    return VRegA_10t(Fetch16(0));
+  }
+  uint8_t VRegA_10x() const {
+    return VRegA_10x(Fetch16(0));
+  }
+  uint4_t VRegA_11n() const {
+    return VRegA_11n(Fetch16(0));
+  }
+  uint8_t VRegA_11x() const {
+    return VRegA_11x(Fetch16(0));
+  }
+  uint4_t VRegA_12x() const {
+    return VRegA_12x(Fetch16(0));
+  }
   int16_t VRegA_20t() const;
-  uint8_t VRegA_21c() const;
-  uint8_t VRegA_21h() const;
-  uint8_t VRegA_21s() const;
-  uint8_t VRegA_21t() const;
-  uint8_t VRegA_22b() const;
-  uint4_t VRegA_22c() const;
-  uint4_t VRegA_22s() const;
-  uint4_t VRegA_22t() const;
-  uint8_t VRegA_22x() const;
-  uint8_t VRegA_23x() const;
+  uint8_t VRegA_21c() const {
+    return VRegA_21c(Fetch16(0));
+  }
+  uint8_t VRegA_21h() const {
+    return VRegA_21h(Fetch16(0));
+  }
+  uint8_t VRegA_21s() const {
+    return VRegA_21s(Fetch16(0));
+  }
+  uint8_t VRegA_21t() const {
+    return VRegA_21t(Fetch16(0));
+  }
+  uint8_t VRegA_22b() const {
+    return VRegA_22b(Fetch16(0));
+  }
+  uint4_t VRegA_22c() const {
+    return VRegA_22c(Fetch16(0));
+  }
+  uint4_t VRegA_22s() const {
+    return VRegA_22s(Fetch16(0));
+  }
+  uint4_t VRegA_22t() const {
+    return VRegA_22t(Fetch16(0));
+  }
+  uint8_t VRegA_22x() const {
+    return VRegA_22x(Fetch16(0));
+  }
+  uint8_t VRegA_23x() const {
+    return VRegA_23x(Fetch16(0));
+  }
   int32_t VRegA_30t() const;
-  uint8_t VRegA_31c() const;
-  uint8_t VRegA_31i() const;
-  uint8_t VRegA_31t() const;
+  uint8_t VRegA_31c() const {
+    return VRegA_31c(Fetch16(0));
+  }
+  uint8_t VRegA_31i() const {
+    return VRegA_31i(Fetch16(0));
+  }
+  uint8_t VRegA_31t() const {
+    return VRegA_31t(Fetch16(0));
+  }
   uint16_t VRegA_32x() const;
-  uint4_t VRegA_35c() const;
-  uint8_t VRegA_3rc() const;
-  uint8_t VRegA_51l() const;
+  uint4_t VRegA_35c() const {
+    return VRegA_35c(Fetch16(0));
+  }
+  uint8_t VRegA_3rc() const {
+    return VRegA_3rc(Fetch16(0));
+  }
+  uint8_t VRegA_51l() const {
+    return VRegA_51l(Fetch16(0));
+  }
+
+  // The following methods return the vA operand for various instruction formats. The "inst_data"
+  // parameter holds the first 16 bits of instruction which the returned value is decoded from.
+  int8_t VRegA_10t(uint16_t inst_data) const;
+  uint8_t VRegA_10x(uint16_t inst_data) const;
+  uint4_t VRegA_11n(uint16_t inst_data) const;
+  uint8_t VRegA_11x(uint16_t inst_data) const;
+  uint4_t VRegA_12x(uint16_t inst_data) const;
+  uint8_t VRegA_21c(uint16_t inst_data) const;
+  uint8_t VRegA_21h(uint16_t inst_data) const;
+  uint8_t VRegA_21s(uint16_t inst_data) const;
+  uint8_t VRegA_21t(uint16_t inst_data) const;
+  uint8_t VRegA_22b(uint16_t inst_data) const;
+  uint4_t VRegA_22c(uint16_t inst_data) const;
+  uint4_t VRegA_22s(uint16_t inst_data) const;
+  uint4_t VRegA_22t(uint16_t inst_data) const;
+  uint8_t VRegA_22x(uint16_t inst_data) const;
+  uint8_t VRegA_23x(uint16_t inst_data) const;
+  uint8_t VRegA_31c(uint16_t inst_data) const;
+  uint8_t VRegA_31i(uint16_t inst_data) const;
+  uint8_t VRegA_31t(uint16_t inst_data) const;
+  uint4_t VRegA_35c(uint16_t inst_data) const;
+  uint8_t VRegA_3rc(uint16_t inst_data) const;
+  uint8_t VRegA_51l(uint16_t inst_data) const;
 
   // VRegB
   bool HasVRegB() const;
   int32_t VRegB() const;
-  int4_t VRegB_11n() const;
-  uint4_t VRegB_12x() const;
+
+  int4_t VRegB_11n() const {
+    return VRegB_11n(Fetch16(0));
+  }
+  uint4_t VRegB_12x() const {
+    return VRegB_12x(Fetch16(0));
+  }
   uint16_t VRegB_21c() const;
   uint16_t VRegB_21h() const;
   int16_t VRegB_21s() const;
   int16_t VRegB_21t() const;
   uint8_t VRegB_22b() const;
-  uint4_t VRegB_22c() const;
-  uint4_t VRegB_22s() const;
-  uint4_t VRegB_22t() const;
+  uint4_t VRegB_22c() const {
+    return VRegB_22c(Fetch16(0));
+  }
+  uint4_t VRegB_22s() const {
+    return VRegB_22s(Fetch16(0));
+  }
+  uint4_t VRegB_22t() const {
+    return VRegB_22t(Fetch16(0));
+  }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
   uint32_t VRegB_31c() const;
@@ -265,9 +343,19 @@
   uint16_t VRegB_3rc() const;
   uint64_t VRegB_51l() const;  // vB_wide
 
+  // The following methods return the vB operand for all instruction formats where it is encoded in
+  // the first 16 bits of instruction. The "inst_data" parameter holds these 16 bits. The returned
+  // value is decoded from it.
+  int4_t VRegB_11n(uint16_t inst_data) const;
+  uint4_t VRegB_12x(uint16_t inst_data) const;
+  uint4_t VRegB_22c(uint16_t inst_data) const;
+  uint4_t VRegB_22s(uint16_t inst_data) const;
+  uint4_t VRegB_22t(uint16_t inst_data) const;
+
   // VRegC
   bool HasVRegC() const;
   int32_t VRegC() const;
+
   int8_t VRegC_22b() const;
   uint16_t VRegC_22c() const;
   int16_t VRegC_22s() const;
@@ -277,11 +365,21 @@
   uint16_t VRegC_3rc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  void GetArgs(uint32_t args[5]) const;
+  void GetArgs(uint32_t args[5], uint16_t inst_data) const;
+  void GetArgs(uint32_t args[5]) const {
+    return GetArgs(args, Fetch16(0));
+  }
 
-  // Returns the opcode field of the instruction.
+  // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
+  // 16 bits of instruction.
+  Code Opcode(uint16_t inst_data) const {
+    DCHECK_EQ(inst_data, Fetch16(0));
+    return static_cast<Code>(inst_data & 0xFF);
+  }
+
+  // Returns the opcode field of the instruction from the first 16 bits of instruction.
   Code Opcode() const {
-    return static_cast<Code>(Fetch16(0) & 0xFF);
+    return Opcode(Fetch16(0));
   }
 
   void SetOpcode(Code opcode) {
@@ -395,28 +493,43 @@
   // Dump code_units worth of this instruction, padding to code_units for shorter instructions
   std::string DumpHex(size_t code_units) const;
 
- private:
-  size_t SizeInCodeUnitsComplexOpcode() const;
-
   uint16_t Fetch16(size_t offset) const {
     const uint16_t* insns = reinterpret_cast<const uint16_t*>(this);
     return insns[offset];
   }
 
+ private:
+  size_t SizeInCodeUnitsComplexOpcode() const;
+
   uint32_t Fetch32(size_t offset) const {
     return (Fetch16(offset) | ((uint32_t) Fetch16(offset + 1) << 16));
   }
 
   uint4_t InstA() const {
-    return static_cast<uint4_t>((Fetch16(0) >> 8) & 0x0f);
+    return InstA(Fetch16(0));
   }
 
   uint4_t InstB() const {
-    return static_cast<uint4_t>(Fetch16(0) >> 12);
+    return InstB(Fetch16(0));
   }
 
   uint8_t InstAA() const {
-    return static_cast<uint8_t>(Fetch16(0) >> 8);
+    return InstAA(Fetch16(0));
+  }
+
+  uint4_t InstA(uint16_t inst_data) const {
+    DCHECK_EQ(inst_data, Fetch16(0));
+    return static_cast<uint4_t>((inst_data >> 8) & 0x0f);
+  }
+
+  uint4_t InstB(uint16_t inst_data) const {
+    DCHECK_EQ(inst_data, Fetch16(0));
+    return static_cast<uint4_t>(inst_data >> 12);
+  }
+
+  uint8_t InstAA(uint16_t inst_data) const {
+    DCHECK_EQ(inst_data, Fetch16(0));
+    return static_cast<uint8_t>(inst_data >> 8);
   }
 
   static const char* const kInstructionNames[];
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 52f8c81..d9c9e31 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -33,20 +33,20 @@
 
 namespace art {
 
-// Helper function to allocate array for FILLED_NEW_ARRAY.
-mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
-                                          int32_t component_count, Thread* self,
-                                          bool access_check) {
+static inline bool CheckFilledNewArrayAlloc(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                            int32_t component_count, Thread* self,
+                                            bool access_check, mirror::Class** klass_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
-    return NULL;  // Failure
+    return false;  // Failure
   }
-  mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
+  mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
   if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, referrer);
     if (klass == NULL) {  // Error
       DCHECK(self->IsExceptionPending());
-      return NULL;  // Failure
+      return false;  // Failure
     }
   }
   if (UNLIKELY(klass->IsPrimitive() && !klass->IsPrimitiveInt())) {
@@ -60,18 +60,40 @@
                                "Found type %s; filled-new-array not implemented for anything but \'int\'",
                                PrettyDescriptor(klass).c_str());
     }
-    return NULL;  // Failure
-  } else {
-    if (access_check) {
-      mirror::Class* referrer_klass = referrer->GetDeclaringClass();
-      if (UNLIKELY(!referrer_klass->CanAccess(klass))) {
-        ThrowIllegalAccessErrorClass(referrer_klass, klass);
-        return NULL;  // Failure
-      }
-    }
-    DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
-    return mirror::Array::Alloc(self, klass, component_count);
+    return false;  // Failure
   }
+  if (access_check) {
+    mirror::Class* referrer_klass = referrer->GetDeclaringClass();
+    if (UNLIKELY(!referrer_klass->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer_klass, klass);
+      return false;  // Failure
+    }
+  }
+  DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
+  *klass_ptr = klass;
+  return true;
+}
+
+// Helper function to allocate array for FILLED_NEW_ARRAY.
+mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                          int32_t component_count, Thread* self,
+                                          bool access_check) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
+    return NULL;
+  }
+  return mirror::Array::AllocUninstrumented(self, klass, component_count);
+}
+
+// Helper function to allocate array for FILLED_NEW_ARRAY.
+mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                                      int32_t component_count, Thread* self,
+                                                      bool access_check) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
+    return NULL;
+  }
+  return mirror::Array::AllocInstrumented(self, klass, component_count);
 }
 
 mirror::ArtField* FindFieldFromCode(uint32_t field_idx, const mirror::ArtMethod* referrer,
@@ -405,5 +427,4 @@
     return zero;
   }
 }
-
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 8b58cb3..c32a661 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -16,7 +16,8 @@
 
 #ifndef ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
 #define ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
-#include "object_utils.h"
+
+#include "base/macros.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file.h"
@@ -27,6 +28,7 @@
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/throwable.h"
+#include "object_utils.h"
 
 #include "thread.h"
 
@@ -38,6 +40,42 @@
   class Object;
 }  // namespace mirror
 
+static inline bool CheckObjectAlloc(uint32_t type_idx, mirror::ArtMethod* method,
+                                    Thread* self,
+                                    bool access_check,
+                                    mirror::Class** klass_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(klass == NULL)) {
+    klass = runtime->GetClassLinker()->ResolveType(type_idx, method);
+    if (klass == NULL) {
+      DCHECK(self->IsExceptionPending());
+      return false;  // Failure
+    }
+  }
+  if (access_check) {
+    if (UNLIKELY(!klass->IsInstantiable())) {
+      ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+      self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
+                              PrettyDescriptor(klass).c_str());
+      return false;  // Failure
+    }
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      return false;  // Failure
+    }
+  }
+  if (!klass->IsInitialized() &&
+      !runtime->GetClassLinker()->EnsureInitialized(klass, true, true)) {
+    DCHECK(self->IsExceptionPending());
+    return false;  // Failure
+  }
+  *klass_ptr = klass;
+  return true;
+}
+
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
@@ -46,34 +84,50 @@
                                                   Thread* self,
                                                   bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->Get(type_idx);
-  Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(klass == NULL)) {
-    klass = runtime->GetClassLinker()->ResolveType(type_idx, method);
-    if (klass == NULL) {
-      DCHECK(self->IsExceptionPending());
-      return NULL;  // Failure
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
+    return NULL;
+  }
+  return klass->AllocObjectUninstrumented(self);
+}
+
+static inline mirror::Object* AllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                              Thread* self,
+                                                              bool access_check)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
+    return NULL;
+  }
+  return klass->AllocObjectInstrumented(self);
+}
+
+static inline bool CheckArrayAlloc(uint32_t type_idx, mirror::ArtMethod* method,
+                                   int32_t component_count,
+                                   bool access_check, mirror::Class** klass_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (UNLIKELY(component_count < 0)) {
+    ThrowNegativeArraySizeException(component_count);
+    return false;  // Failure
+  }
+  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
+    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
+    if (klass == NULL) {  // Error
+      DCHECK(Thread::Current()->IsExceptionPending());
+      return false;  // Failure
     }
+    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
   }
   if (access_check) {
-    if (UNLIKELY(!klass->IsInstantiable())) {
-      ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-      self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
-                              PrettyDescriptor(klass).c_str());
-      return NULL;  // Failure
-    }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
-      return NULL;  // Failure
+      return false;  // Failure
     }
   }
-  if (!klass->IsInitialized() &&
-      !runtime->GetClassLinker()->EnsureInitialized(klass, true, true)) {
-    DCHECK(self->IsExceptionPending());
-    return NULL;  // Failure
-  }
-  return klass->AllocObject(self);
+  *klass_ptr = klass;
+  return true;
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
@@ -84,27 +138,22 @@
                                                 int32_t component_count,
                                                 Thread* self, bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (UNLIKELY(component_count < 0)) {
-    ThrowNegativeArraySizeException(component_count);
-    return NULL;  // Failure
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
+    return NULL;
   }
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->Get(type_idx);
-  if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
-    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-    if (klass == NULL) {  // Error
-      DCHECK(Thread::Current()->IsExceptionPending());
-      return NULL;  // Failure
-    }
-    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
+  return mirror::Array::AllocUninstrumented(self, klass, component_count);
+}
+
+static inline mirror::Array* AllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                            int32_t component_count,
+                                                            Thread* self, bool access_check)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
+    return NULL;
   }
-  if (access_check) {
-    mirror::Class* referrer = method->GetDeclaringClass();
-    if (UNLIKELY(!referrer->CanAccess(klass))) {
-      ThrowIllegalAccessErrorClass(referrer, klass);
-      return NULL;  // Failure
-    }
-  }
-  return mirror::Array::Alloc(self, klass, component_count);
+  return mirror::Array::AllocInstrumented(self, klass, component_count);
 }
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
@@ -112,6 +161,11 @@
                                                  Thread* self, bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                             int32_t component_count,
+                                                             Thread* self, bool access_check)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 // Type of find field operation for fast and slow case.
 enum FindFieldType {
   InstanceObjectRead,
@@ -258,7 +312,7 @@
   //
   // Do not set the DexCache InitializedStaticStorage, since that implies <clinit> has finished
   // running.
-  if (klass == referring_class && MethodHelper(referrer).IsClassInitializer()) {
+  if (klass == referring_class && referrer->IsConstructor() && referrer->IsStatic()) {
     return klass;
   }
   if (!class_linker->EnsureInitialized(klass, true, true)) {
@@ -324,7 +378,6 @@
   for (;;) {
     if (thread->ReadFlag(kCheckpointRequest)) {
       thread->RunCheckpointFunction();
-      thread->AtomicClearFlag(kCheckpointRequest);
     } else if (thread->ReadFlag(kSuspendRequest)) {
       thread->FullSuspendCheck();
     } else {
@@ -416,6 +469,23 @@
   return reinterpret_cast<void*>(art_jni_dlsym_lookup_stub);
 }
 
+template <typename INT_TYPE, typename FLOAT_TYPE>
+static inline INT_TYPE art_float_to_integral(FLOAT_TYPE f) {
+  const INT_TYPE kMaxInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::max());
+  const INT_TYPE kMinInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::min());
+  const FLOAT_TYPE kMaxIntAsFloat = static_cast<FLOAT_TYPE>(kMaxInt);
+  const FLOAT_TYPE kMinIntAsFloat = static_cast<FLOAT_TYPE>(kMinInt);
+  if (LIKELY(f > kMinIntAsFloat)) {
+     if (LIKELY(f < kMaxIntAsFloat)) {
+       return static_cast<INT_TYPE>(f);
+     } else {
+       return kMaxInt;
+     }
+  } else {
+    return (f != f) ? 0 : kMinInt;  // f != f implies NaN
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index ecf98bc..05c02f2 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -31,7 +31,15 @@
   mirror::ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
-    Runtime::Current()->GetClassLinker()->EnsureInitialized(method->GetDeclaringClass(), true, true);
+    mirror::Class* declaringClass = method->GetDeclaringClass();
+    if (UNLIKELY(!declaringClass->IsInitializing())) {
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass,
+                                                                            true, true))) {
+        DCHECK(Thread::Current()->IsExceptionPending());
+        return;
+      }
+      CHECK(declaringClass->IsInitializing());
+    }
   }
   uint16_t arg_offset = (code_item == NULL) ? 0 : code_item->registers_size_ - code_item->ins_size_;
 #if defined(ART_USE_PORTABLE_COMPILER)
@@ -40,7 +48,7 @@
   method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
 #else
   method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
-                 (shadow_frame->NumberOfVRegs() - arg_offset) * 4,
+                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
                  result, mh.GetShorty()[0]);
 #endif
 }
diff --git a/runtime/entrypoints/math_entrypoints.cc b/runtime/entrypoints/math_entrypoints.cc
index 31d13c8..b839b63 100644
--- a/runtime/entrypoints/math_entrypoints.cc
+++ b/runtime/entrypoints/math_entrypoints.cc
@@ -16,6 +16,8 @@
 
 #include "math_entrypoints.h"
 
+#include "entrypoint_utils.h"
+
 namespace art {
 
 extern "C" double art_l2d(int64_t l) {
@@ -31,59 +33,19 @@
  * target doesn't support this normally, use these.
  */
 extern "C" int64_t art_d2l(double d) {
-  static const double kMaxLong = static_cast<double>(static_cast<int64_t>(0x7fffffffffffffffULL));
-  static const double kMinLong = static_cast<double>(static_cast<int64_t>(0x8000000000000000ULL));
-  if (d >= kMaxLong) {
-    return static_cast<int64_t>(0x7fffffffffffffffULL);
-  } else if (d <= kMinLong) {
-    return static_cast<int64_t>(0x8000000000000000ULL);
-  } else if (d != d)  {  // NaN case
-    return 0;
-  } else {
-    return static_cast<int64_t>(d);
-  }
+  return art_float_to_integral<int64_t, double>(d);
 }
 
 extern "C" int64_t art_f2l(float f) {
-  static const float kMaxLong = static_cast<float>(static_cast<int64_t>(0x7fffffffffffffffULL));
-  static const float kMinLong = static_cast<float>(static_cast<int64_t>(0x8000000000000000ULL));
-  if (f >= kMaxLong) {
-    return static_cast<int64_t>(0x7fffffffffffffffULL);
-  } else if (f <= kMinLong) {
-    return static_cast<int64_t>(0x8000000000000000ULL);
-  } else if (f != f) {  // NaN case
-    return 0;
-  } else {
-    return static_cast<int64_t>(f);
-  }
+  return art_float_to_integral<int64_t, float>(f);
 }
 
 extern "C" int32_t art_d2i(double d) {
-  static const double kMaxInt = static_cast<double>(static_cast<int32_t>(0x7fffffffUL));
-  static const double kMinInt = static_cast<double>(static_cast<int32_t>(0x80000000UL));
-  if (d >= kMaxInt) {
-    return static_cast<int32_t>(0x7fffffffUL);
-  } else if (d <= kMinInt) {
-    return static_cast<int32_t>(0x80000000UL);
-  } else if (d != d)  {  // NaN case
-    return 0;
-  } else {
-    return static_cast<int32_t>(d);
-  }
+  return art_float_to_integral<int32_t, double>(d);
 }
 
 extern "C" int32_t art_f2i(float f) {
-  static const float kMaxInt = static_cast<float>(static_cast<int32_t>(0x7fffffffUL));
-  static const float kMinInt = static_cast<float>(static_cast<int32_t>(0x80000000UL));
-  if (f >= kMaxInt) {
-    return static_cast<int32_t>(0x7fffffffUL);
-  } else if (f <= kMinInt) {
-    return static_cast<int32_t>(0x80000000UL);
-  } else if (f != f) {  // NaN case
-    return 0;
-  } else {
-    return static_cast<int32_t>(f);
-  }
+  return art_float_to_integral<int32_t, float>(f);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 420e63a..6f7b1ab 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -76,4 +76,57 @@
   return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true);
 }
 
+extern "C" mirror::Object* artAllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                              Thread* self, mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocObjectFromCodeInstrumented(type_idx, method, self, false);
+}
+
+extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
+                                                                             mirror::ArtMethod* method,
+                                                                             Thread* self,
+                                                                             mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocObjectFromCodeInstrumented(type_idx, method, self, true);
+}
+
+extern "C" mirror::Array* artAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                            int32_t component_count, Thread* self,
+                                                              mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
+}
+
+extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
+                                                                           mirror::ArtMethod* method,
+                                                                           int32_t component_count,
+                                                                           Thread* self,
+                                                                           mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
+}
+
+extern "C" mirror::Array* artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
+                                                                    mirror::ArtMethod* method,
+                                                                    int32_t component_count, Thread* self,
+                                                                    mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
+}
+
+extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
+                                                                                   mirror::ArtMethod* method,
+                                                                                   int32_t component_count,
+                                                                                   Thread* self,
+                                                                                   mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 9ffa736..ae53d6c 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -14,11 +14,8 @@
  * limitations under the License.
  */
 
-#include "callee_save_frame.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
 
 namespace art {
 
@@ -31,38 +28,4 @@
   return klass->IsAssignableFrom(ref_class) ? 1 : 0;
 }
 
-// Check whether it is safe to cast one class to the other, throw exception and return -1 on failure
-extern "C" int artCheckCastFromCode(mirror::Class* src_type, mirror::Class* dest_type,
-                                    Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(src_type->IsClass()) << PrettyClass(src_type);
-  DCHECK(dest_type->IsClass()) << PrettyClass(dest_type);
-  if (LIKELY(dest_type->IsAssignableFrom(src_type))) {
-    return 0;  // Success
-  } else {
-    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-    ThrowClassCastException(dest_type, src_type);
-    return -1;  // Failure
-  }
-}
-
-// Tests whether 'element' can be assigned into an array of type 'array_class'.
-// Returns 0 on success and -1 if an exception is pending.
-extern "C" int artCanPutArrayElementFromCode(const mirror::Object* element,
-                                             const mirror::Class* array_class,
-                                             Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(array_class != NULL);
-  // element can't be NULL as we catch this is screened in runtime_support
-  mirror::Class* element_class = element->GetClass();
-  mirror::Class* component_type = array_class->GetComponentType();
-  if (LIKELY(component_type->IsAssignableFrom(element_class))) {
-    return 0;  // Success
-  } else {
-    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-    ThrowArrayStoreException(element_class, array_class);
-    return -1;  // Failure
-  }
-}
-
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 9d3b8ef..e9964ad 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -48,7 +48,6 @@
 
   // Cast
   uint32_t (*pInstanceofNonTrivial)(const mirror::Class*, const mirror::Class*);
-  void (*pCanPutArrayElement)(void*, void*);
   void (*pCheckCast)(void*, void*);
 
   // DexCache
@@ -71,7 +70,10 @@
   void* (*pGetObjInstance)(uint32_t, void*);
   void* (*pGetObjStatic)(uint32_t);
 
-  // FillArray
+  // Array
+  void (*pAputObjectWithNullAndBoundCheck)(void*, uint32_t, void*);  // array, index, src
+  void (*pAputObjectWithBoundCheck)(void*, uint32_t, void*);  // array, index, src
+  void (*pAputObject)(void*, uint32_t, void*);  // array, index, src
   void (*pHandleFillArrayData)(void*, void*);
 
   // JNI
@@ -103,7 +105,7 @@
   int64_t (*pD2l)(double);
   int64_t (*pF2l)(float);
   int64_t (*pLdiv)(int64_t, int64_t);
-  int64_t (*pLdivmod)(int64_t, int64_t);
+  int64_t (*pLmod)(int64_t, int64_t);
   int64_t (*pLmul)(int64_t, int64_t);
   uint64_t (*pShlLong)(uint64_t, uint32_t);
   uint64_t (*pShrLong)(uint64_t, uint32_t);
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 36ca604..2102ab1 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -15,28 +15,40 @@
  */
 
 #include "callee_save_frame.h"
+#include "common_throws.h"
 #include "mirror/object-inl.h"
 
 namespace art {
 
-extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self,
-                                       mirror::ArtMethod** sp)
-    UNLOCK_FUNCTION(monitor_lock_) {
+extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  DCHECK(obj != NULL);  // Assumed to have been checked before entry
-  // MonitorExit may throw exception
-  return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
+  if (UNLIKELY(obj == NULL)) {
+    ThrowLocation throw_location(self->GetCurrentLocationForThrow());
+    ThrowNullPointerException(&throw_location,
+                              "Null reference used for synchronization (monitor-enter)");
+    return -1;  // Failure.
+  } else {
+    obj->MonitorEnter(self);  // May block
+    DCHECK(self->HoldsLock(obj));
+    DCHECK(!self->IsExceptionPending());
+    return 0;  // Success.
+    // Only possible exception is NPE and is handled before entry
+  }
 }
 
-extern "C" void artLockObjectFromCode(mirror::Object* obj, Thread* thread,
-                                      mirror::ArtMethod** sp)
-    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
-  FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly);
-  DCHECK(obj != NULL);        // Assumed to have been checked before entry
-  obj->MonitorEnter(thread);  // May block
-  DCHECK(thread->HoldsLock(obj));
-  // Only possible exception is NPE and is handled before entry
-  DCHECK(!thread->IsExceptionPending());
+extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+    UNLOCK_FUNCTION(monitor_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  if (UNLIKELY(obj == NULL)) {
+    ThrowLocation throw_location(self->GetCurrentLocationForThrow());
+    ThrowNullPointerException(&throw_location,
+                              "Null reference used for synchronization (monitor-exit)");
+    return -1;  // Failure.
+  } else {
+    // MonitorExit may throw exception.
+    return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
+  }
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_math_entrypoints.cc b/runtime/entrypoints/quick/quick_math_entrypoints.cc
index 0bfe59d..014aad3 100644
--- a/runtime/entrypoints/quick/quick_math_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_math_entrypoints.cc
@@ -62,15 +62,15 @@
   return -1;
 }
 
-extern "C" int64_t artLmulFromCode(int64_t a, int64_t b) {
+extern "C" int64_t artLmul(int64_t a, int64_t b) {
   return a * b;
 }
 
-extern "C" int64_t artLdivFromCode(int64_t a, int64_t b) {
+extern "C" int64_t artLdiv(int64_t a, int64_t b) {
   return a / b;
 }
 
-extern "C" int64_t artLdivmodFromCode(int64_t a, int64_t b) {
+extern "C" int64_t artLmod(int64_t a, int64_t b) {
   return a % b;
 }
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index f67b2fc..31eacac 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -15,8 +15,9 @@
  */
 
 #include "callee_save_frame.h"
+#include "common_throws.h"
 #include "entrypoints/entrypoint_utils.h"
-#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "object_utils.h"
 #include "thread.h"
 #include "well_known_classes.h"
@@ -95,4 +96,21 @@
   self->QuickDeliverException();
 }
 
+extern "C" void artThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type,
+                                           Thread* self, mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  CHECK(!dest_type->IsAssignableFrom(src_type));
+  ThrowClassCastException(dest_type, src_type);
+  self->QuickDeliverException();
+}
+
+extern "C" void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
+                                            Thread* self, mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ThrowArrayStoreException(value->GetClass(), array->GetClass());
+  self->QuickDeliverException();
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index cb486d5..12291c3 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -423,13 +423,23 @@
 
   virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (IsParamAReference()) {
-      soa_->AddLocalReference<jobject>(*reinterpret_cast<mirror::Object**>(GetParamAddress()));
+      mirror::Object** param_address = reinterpret_cast<mirror::Object**>(GetParamAddress());
+      jobject reference =
+          soa_->AddLocalReference<jobject>(*param_address);
+      references_.push_back(std::make_pair(reference, param_address));
+    }
+  }
+
+  void FixupReferences() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Fixup any references which may have changed.
+    for (std::pair<jobject, mirror::Object**>& it : references_) {
+      *it.second = soa_->Decode<mirror::Object*>(it.first);
     }
   }
 
  private:
   ScopedObjectAccessUnchecked* soa_;
-
+  std::vector<std::pair<jobject, mirror::Object**> > references_;
   DISALLOW_COPY_AND_ASSIGN(RememberFoGcArgumentVisitor);
 };
 
@@ -556,11 +566,8 @@
     }
   }
   CHECK_EQ(code == NULL, thread->IsExceptionPending());
-#ifdef MOVING_GARBAGE_COLLECTOR
-  // TODO: locally saved objects may have moved during a GC during resolution. Need to update the
-  //       registers so that the stale objects aren't passed to the method we've resolved.
-    UNIMPLEMENTED(WARNING);
-#endif
+  // Fixup any locally saved objects may have moved during a GC.
+  visitor.FixupReferences();
   // Place called method in callee-save frame to be placed as first argument to quick method.
   *sp = called;
   return code;
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index 29450c1..fb425df 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -28,9 +28,11 @@
 // A mod-union table to record image references to the Zygote and alloc space.
 class ModUnionTableToZygoteAllocspace : public ModUnionTableReferenceCache {
  public:
-  explicit ModUnionTableToZygoteAllocspace(Heap* heap) : ModUnionTableReferenceCache(heap) {}
+  explicit ModUnionTableToZygoteAllocspace(const std::string& name, Heap* heap,
+                                           space::ContinuousSpace* space)
+      : ModUnionTableReferenceCache(name, heap, space) {}
 
-  bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) {
+  bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) ALWAYS_INLINE {
     const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
     for (It it = spaces.begin(); it != spaces.end(); ++it) {
@@ -47,16 +49,18 @@
 // A mod-union table to record Zygote references to the alloc space.
 class ModUnionTableToAllocspace : public ModUnionTableReferenceCache {
  public:
-  explicit ModUnionTableToAllocspace(Heap* heap) : ModUnionTableReferenceCache(heap) {}
+  explicit ModUnionTableToAllocspace(const std::string& name, Heap* heap,
+                                     space::ContinuousSpace* space)
+      : ModUnionTableReferenceCache(name, heap, space) {}
 
-  bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) {
+  bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) ALWAYS_INLINE {
     const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
     for (It it = spaces.begin(); it != spaces.end(); ++it) {
       space::ContinuousSpace* space = *it;
       if (space->Contains(ref)) {
         // The allocation space is always considered for collection whereas the Zygote space is
-        //
+        // only considered for full GC.
         return space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect;
       }
     }
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 4865219..7cbe94d 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -19,6 +19,7 @@
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
+#include "gc/collector/mark_sweep.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
@@ -67,60 +68,87 @@
   std::vector<byte*>* const cleared_cards_;
 };
 
-class ModUnionScanImageRootVisitor {
+class ModUnionUpdateObjectReferencesVisitor {
  public:
-  explicit ModUnionScanImageRootVisitor(collector::MarkSweep* const mark_sweep)
-      : mark_sweep_(mark_sweep) {}
+  ModUnionUpdateObjectReferencesVisitor(RootVisitor visitor, void* arg)
+    : visitor_(visitor),
+      arg_(arg) {
+  }
 
-  void operator()(const Object* root) const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(root != NULL);
-    mark_sweep_->ScanRoot(root);
+  // Extra parameters are required since we use this same visitor signature for checking objects.
+  void operator()(Object* obj, Object* ref, const MemberOffset& offset,
+                  bool /* is_static */) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Only add the reference if it is non null and fits our criteria.
+    if (ref != nullptr) {
+      Object* new_ref = visitor_(ref, arg_);
+      if (new_ref != ref) {
+        obj->SetFieldObject(offset, ref, false, true);
+      }
+    }
   }
 
  private:
-  collector::MarkSweep* const mark_sweep_;
+  RootVisitor* visitor_;
+  void* arg_;
 };
 
-void ModUnionTableReferenceCache::ClearCards(space::ContinuousSpace* space) {
+class ModUnionScanImageRootVisitor {
+ public:
+  ModUnionScanImageRootVisitor(RootVisitor visitor, void* arg)
+      : visitor_(visitor), arg_(arg) {}
+
+  void operator()(Object* root) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(root != NULL);
+    ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_, arg_);
+    collector::MarkSweep::VisitObjectReferences(root, ref_visitor, true);
+  }
+
+ private:
+  RootVisitor* visitor_;
+  void* arg_;
+};
+
+void ModUnionTableReferenceCache::ClearCards() {
   CardTable* card_table = GetHeap()->GetCardTable();
   ModUnionClearCardSetVisitor visitor(&cleared_cards_);
   // Clear dirty cards in the this space and update the corresponding mod-union bits.
-  card_table->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), visitor);
+  card_table->ModifyCardsAtomic(space_->Begin(), space_->End(), AgeCardVisitor(), visitor);
 }
 
 class AddToReferenceArrayVisitor {
  public:
   explicit AddToReferenceArrayVisitor(ModUnionTableReferenceCache* mod_union_table,
-                                      std::vector<const Object*>* references)
+                                      std::vector<Object**>* references)
     : mod_union_table_(mod_union_table),
       references_(references) {
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(const Object* obj, const Object* ref, const MemberOffset& /* offset */,
+  void operator()(Object* obj, Object* ref, const MemberOffset& offset,
                   bool /* is_static */) const {
     // Only add the reference if it is non null and fits our criteria.
-    if (ref != NULL && mod_union_table_->AddReference(obj, ref)) {
-      references_->push_back(ref);
+    if (ref != nullptr && mod_union_table_->AddReference(obj, ref)) {
+      // Push the adddress of the reference.
+      references_->push_back(obj->GetFieldObjectAddr(offset));
     }
   }
 
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  std::vector<const Object*>* const references_;
+  std::vector<Object**>* const references_;
 };
 
 class ModUnionReferenceVisitor {
  public:
   explicit ModUnionReferenceVisitor(ModUnionTableReferenceCache* const mod_union_table,
-                                    std::vector<const Object*>* references)
+                                    std::vector<Object**>* references)
     : mod_union_table_(mod_union_table),
       references_(references) {
   }
 
-  void operator()(const Object* obj) const
+  void operator()(Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     DCHECK(obj != NULL);
     // We don't have an early exit since we use the visitor pattern, an early
@@ -130,7 +158,7 @@
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  std::vector<const Object*>* const references_;
+  std::vector<Object**>* const references_;
 };
 
 class CheckReferenceVisitor {
@@ -143,8 +171,8 @@
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
   // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* obj, const Object* ref, const MemberOffset& /* offset */,
-                  bool /* is_static */) const
+  void operator()(const Object* obj, const Object* ref,
+                  const MemberOffset& /* offset */, bool /* is_static */) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     Heap* heap = mod_union_table_->GetHeap();
     if (ref != NULL && mod_union_table_->AddReference(obj, ref) &&
@@ -174,7 +202,7 @@
       : mod_union_table_(mod_union_table), references_(references) {
   }
 
-  void operator()(const Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     DCHECK(obj != NULL);
     CheckReferenceVisitor visitor(mod_union_table_, references_);
@@ -188,26 +216,25 @@
 
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
-  Heap* heap = GetHeap();
-  for (const std::pair<const byte*, std::vector<const Object*> >& it : references_) {
-    for (const Object* ref : it.second) {
-      CHECK(heap->IsLiveObjectLocked(ref));
+  for (const auto& ref_pair : references_) {
+    for (Object** ref : ref_pair.second) {
+      CHECK(heap_->IsLiveObjectLocked(*ref));
     }
   }
 
   // Check the references of each clean card which is also in the mod union table.
-  CardTable* card_table = heap->GetCardTable();
-  for (const std::pair<const byte*, std::vector<const Object*> > & it : references_) {
-    const byte* card = it.first;
+  CardTable* card_table = heap_->GetCardTable();
+  SpaceBitmap* live_bitmap = space_->GetLiveBitmap();
+  for (const auto& ref_pair : references_) {
+    const byte* card = ref_pair.first;
     if (*card == CardTable::kCardClean) {
-      std::set<const Object*> reference_set(it.second.begin(), it.second.end());
+      std::set<const Object*> reference_set;
+      for (Object** obj_ptr : ref_pair.second) {
+        reference_set.insert(*obj_ptr);
+      }
       ModUnionCheckReferences visitor(this, reference_set);
       uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
-      uintptr_t end = start + CardTable::kCardSize;
-      auto* space = heap->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
-      DCHECK(space != nullptr);
-      SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      live_bitmap->VisitMarkedRange(start, end, visitor);
+      live_bitmap->VisitMarkedRange(start, start + CardTable::kCardSize, visitor);
     }
   }
 }
@@ -221,24 +248,24 @@
     os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << ",";
   }
   os << "]\nModUnionTable references: [";
-  for (const std::pair<const byte*, std::vector<const Object*> >& it : references_) {
-    const byte* card_addr = it.first;
+  for (const auto& ref_pair : references_) {
+    const byte* card_addr = ref_pair.first;
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     uintptr_t end = start + CardTable::kCardSize;
     os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << "->{";
-    for (const mirror::Object* ref : it.second) {
-      os << reinterpret_cast<const void*>(ref) << ",";
+    for (Object** ref : ref_pair.second) {
+      os << reinterpret_cast<const void*>(*ref) << ",";
     }
     os << "},";
   }
 }
 
-void ModUnionTableReferenceCache::Update() {
+void ModUnionTableReferenceCache::UpdateAndMarkReferences(RootVisitor visitor, void* arg) {
   Heap* heap = GetHeap();
   CardTable* card_table = heap->GetCardTable();
 
-  std::vector<const Object*> cards_references;
-  ModUnionReferenceVisitor visitor(this, &cards_references);
+  std::vector<Object**> cards_references;
+  ModUnionReferenceVisitor add_visitor(this, &cards_references);
 
   for (const auto& card : cleared_cards_) {
     // Clear and re-compute alloc space references associated with this card.
@@ -248,7 +275,7 @@
     auto* space = heap->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
     DCHECK(space != nullptr);
     SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    live_bitmap->VisitMarkedRange(start, end, visitor);
+    live_bitmap->VisitMarkedRange(start, end, add_visitor);
 
     // Update the corresponding references for the card.
     auto found = references_.find(card);
@@ -263,46 +290,41 @@
     }
   }
   cleared_cards_.clear();
-}
-
-void ModUnionTableReferenceCache::MarkReferences(collector::MarkSweep* mark_sweep) {
   size_t count = 0;
-
   for (const auto& ref : references_) {
-    for (const auto& obj : ref.second) {
-      mark_sweep->MarkRoot(obj);
-      ++count;
+    for (const auto& obj_ptr : ref.second) {
+      Object* obj = *obj_ptr;
+      if (obj != nullptr) {
+        Object* new_obj = visitor(obj, arg);
+        // Avoid dirtying pages in the image unless necessary.
+        if (new_obj != obj) {
+          *obj_ptr = new_obj;
+        }
+      }
     }
+    count += ref.second.size();
   }
   if (VLOG_IS_ON(heap)) {
     VLOG(gc) << "Marked " << count << " references in mod union table";
   }
 }
 
-void ModUnionTableCardCache::ClearCards(space::ContinuousSpace* space) {
+void ModUnionTableCardCache::ClearCards() {
   CardTable* card_table = GetHeap()->GetCardTable();
   ModUnionClearCardSetVisitor visitor(&cleared_cards_);
   // Clear dirty cards in the this space and update the corresponding mod-union bits.
-  card_table->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), visitor);
+  card_table->ModifyCardsAtomic(space_->Begin(), space_->End(), AgeCardVisitor(), visitor);
 }
 
 // Mark all references to the alloc space(s).
-void ModUnionTableCardCache::MarkReferences(collector::MarkSweep* mark_sweep) {
+void ModUnionTableCardCache::UpdateAndMarkReferences(RootVisitor visitor, void* arg) {
   CardTable* card_table = heap_->GetCardTable();
-  ModUnionScanImageRootVisitor visitor(mark_sweep);
-  space::ContinuousSpace* space = nullptr;
-  SpaceBitmap* bitmap = nullptr;
+  ModUnionScanImageRootVisitor scan_visitor(visitor, arg);
+  SpaceBitmap* bitmap = space_->GetLiveBitmap();
   for (const byte* card_addr : cleared_cards_) {
-    auto start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
-    auto end = start + CardTable::kCardSize;
-    auto obj_start = reinterpret_cast<Object*>(start);
-    if (UNLIKELY(space == nullptr || !space->Contains(obj_start))) {
-      space = heap_->FindContinuousSpaceFromObject(obj_start, false);
-      DCHECK(space != nullptr);
-      bitmap = space->GetLiveBitmap();
-      DCHECK(bitmap != nullptr);
-    }
-    bitmap->VisitMarkedRange(start, end, visitor);
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
+    DCHECK(space_->HasAddress(reinterpret_cast<Object*>(start)));
+    bitmap->VisitMarkedRange(start, start + CardTable::kCardSize, scan_visitor);
   }
 }
 
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index eb7a754..d874c60 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -19,6 +19,7 @@
 
 #include "gc_allocator.h"
 #include "globals.h"
+#include "root_visitor.h"
 #include "safe_map.h"
 
 #include <set>
@@ -52,21 +53,23 @@
  public:
   typedef std::set<byte*, std::less<byte*>, GCAllocator<byte*> > CardSet;
 
-  explicit ModUnionTable(Heap* heap) : heap_(heap) {}
+  explicit ModUnionTable(const std::string& name, Heap* heap, space::ContinuousSpace* space)
+      : name_(name),
+        heap_(heap),
+        space_(space) {
+  }
 
   virtual ~ModUnionTable() {}
 
   // Clear cards which map to a memory range of a space. This doesn't immediately update the
   // mod-union table, as updating the mod-union table may have an associated cost, such as
   // determining references to track.
-  virtual void ClearCards(space::ContinuousSpace* space) = 0;
+  virtual void ClearCards() = 0;
 
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
-  // before a call to update, for example, back-to-back sticky GCs.
-  virtual void Update() = 0;
-
-  // Mark the bitmaps for all references which are stored in the mod-union table.
-  virtual void MarkReferences(collector::MarkSweep* mark_sweep) = 0;
+  // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
+  // spaces which are stored in the mod-union table.
+  virtual void UpdateAndMarkReferences(RootVisitor visitor, void* arg) = 0;
 
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
@@ -75,31 +78,35 @@
   virtual void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) = 0;
 
   virtual void Dump(std::ostream& os) = 0;
-
+  space::ContinuousSpace* GetSpace() {
+    return space_;
+  }
   Heap* GetHeap() const {
     return heap_;
   }
+  const std::string& GetName() const {
+    return name_;
+  }
 
  protected:
+  const std::string name_;
   Heap* const heap_;
+  space::ContinuousSpace* const space_;
 };
 
 // Reference caching implementation. Caches references pointing to alloc space(s) for each card.
 class ModUnionTableReferenceCache : public ModUnionTable {
  public:
-  explicit ModUnionTableReferenceCache(Heap* heap) : ModUnionTable(heap) {}
+  explicit ModUnionTableReferenceCache(const std::string& name, Heap* heap,
+                                       space::ContinuousSpace* space)
+      : ModUnionTable(name, heap, space) {}
   virtual ~ModUnionTableReferenceCache() {}
 
   // Clear and store cards for a space.
-  void ClearCards(space::ContinuousSpace* space);
+  void ClearCards();
 
-  // Update table based on cleared cards.
-  void Update()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Mark all references to the alloc space(s).
-  void MarkReferences(collector::MarkSweep* mark_sweep)
+  // Update table based on cleared cards and mark all references to the other spaces.
+  void UpdateAndMarkReferences(RootVisitor visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -117,24 +124,22 @@
   ModUnionTable::CardSet cleared_cards_;
 
   // Maps from dirty cards to their corresponding alloc space references.
-  SafeMap<const byte*, std::vector<const mirror::Object*>, std::less<const byte*>,
-    GCAllocator<std::pair<const byte*, std::vector<const mirror::Object*> > > > references_;
+  SafeMap<const byte*, std::vector<mirror::Object**>, std::less<const byte*>,
+    GCAllocator<std::pair<const byte*, std::vector<mirror::Object**> > > > references_;
 };
 
 // Card caching implementation. Keeps track of which cards we cleared and only this information.
 class ModUnionTableCardCache : public ModUnionTable {
  public:
-  explicit ModUnionTableCardCache(Heap* heap) : ModUnionTable(heap) {}
+  explicit ModUnionTableCardCache(const std::string& name, Heap* heap, space::ContinuousSpace* space)
+      : ModUnionTable(name, heap, space) {}
   virtual ~ModUnionTableCardCache() {}
 
   // Clear and store cards for a space.
-  void ClearCards(space::ContinuousSpace* space);
-
-  // Nothing to update as all dirty cards were placed into cleared cards during clearing.
-  void Update() {}
+  void ClearCards();
 
   // Mark all references to the alloc space(s).
-  void MarkReferences(collector::MarkSweep* mark_sweep)
+  void UpdateAndMarkReferences(RootVisitor visitor, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index f975692..4cf8872 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -247,8 +247,8 @@
 
   template <typename Visitor>
   void Visit(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS {
-    for (Objects::iterator it = contained_.begin(); it != contained_.end(); ++it) {
-      visitor(*it);
+    for (const mirror::Object* obj : contained_) {
+      visitor(const_cast<mirror::Object*>(obj));
     }
   }
 
diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/dlmalloc.cc
index 3cc64e9..a6a3ee7 100644
--- a/runtime/gc/allocator/dlmalloc.cc
+++ b/runtime/gc/allocator/dlmalloc.cc
@@ -69,3 +69,19 @@
     *reclaimed += length;
   }
 }
+
+extern "C" void DlmallocBytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
+  *bytes_allocated += used_bytes + sizeof(size_t);
+}
+
+extern "C" void DlmallocObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
+  ++(*objects_allocated);
+}
diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/dlmalloc.h
index 07ebd1c..c820b19 100644
--- a/runtime/gc/allocator/dlmalloc.h
+++ b/runtime/gc/allocator/dlmalloc.h
@@ -18,6 +18,8 @@
 #define ART_RUNTIME_GC_ALLOCATOR_DLMALLOC_H_
 
 // Configure dlmalloc for mspaces.
+// Avoid a collision with one used in llvm.
+#undef HAVE_MMAP
 #define HAVE_MMAP 0
 #define HAVE_MREMAP 0
 #define HAVE_MORECORE 1
@@ -37,4 +39,10 @@
 // pages back to the kernel.
 extern "C" void DlmallocMadviseCallback(void* start, void* end, size_t used_bytes, void* /*arg*/);
 
+// Callbacks for dlmalloc_inspect_all or mspace_inspect_all that will
+// count the number of bytes allocated and objects allocated,
+// respectively.
+extern "C" void DlmallocBytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+extern "C" void DlmallocObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+
 #endif  // ART_RUNTIME_GC_ALLOCATOR_DLMALLOC_H_
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index d0b0b5c..270c9ef 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -29,7 +29,7 @@
 namespace collector {
 
 template <typename MarkVisitor>
-inline void MarkSweep::ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor) {
+inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor) {
   DCHECK(obj != NULL);
   if (kIsDebugBuild && !IsMarked(obj)) {
     heap_->DumpSpaces();
@@ -62,7 +62,8 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitObjectReferences(const mirror::Object* obj, const Visitor& visitor)
+inline void MarkSweep::VisitObjectReferences(mirror::Object* obj, const Visitor& visitor,
+                                             bool visit_class)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                           Locks::mutator_lock_) {
   DCHECK(obj != NULL);
@@ -70,6 +71,9 @@
 
   mirror::Class* klass = obj->GetClass();
   DCHECK(klass != NULL);
+  if (visit_class) {
+    visitor(obj, klass, MemberOffset(0), false);
+  }
   if (klass == mirror::Class::GetJavaLangClass()) {
     DCHECK_EQ(klass->GetClass(), mirror::Class::GetJavaLangClass());
     VisitClassReferences(klass, obj, visitor);
@@ -86,8 +90,8 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitInstanceFieldsReferences(const mirror::Class* klass,
-                                                     const mirror::Object* obj,
+inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass,
+                                                     mirror::Object* obj,
                                                      const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
   DCHECK(obj != NULL);
@@ -96,7 +100,7 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitClassReferences(const mirror::Class* klass, const mirror::Object* obj,
+inline void MarkSweep::VisitClassReferences(mirror::Class* klass, mirror::Object* obj,
                                             const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
   VisitInstanceFieldsReferences(klass, obj, visitor);
@@ -104,15 +108,14 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitStaticFieldsReferences(const mirror::Class* klass,
-                                                   const Visitor& visitor)
+inline void MarkSweep::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
   DCHECK(klass != NULL);
   VisitFieldsReferences(klass, klass->GetReferenceStaticOffsets(), true, visitor);
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitFieldsReferences(const mirror::Object* obj, uint32_t ref_offsets,
+inline void MarkSweep::VisitFieldsReferences(mirror::Object* obj, uint32_t ref_offsets,
                                              bool is_static, const Visitor& visitor) {
   if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
     // Found a reference offset bitmap.  Mark the specified offsets.
@@ -124,7 +127,7 @@
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      const mirror::Object* ref = obj->GetFieldObject<const mirror::Object*>(field_offset, false);
+      mirror::Object* ref = obj->GetFieldObject<mirror::Object*>(field_offset, false);
       visitor(obj, ref, field_offset, is_static);
       ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
     }
@@ -143,7 +146,7 @@
         mirror::ArtField* field = (is_static ? klass->GetStaticField(i)
                                    : klass->GetInstanceField(i));
         MemberOffset field_offset = field->GetOffset();
-        const mirror::Object* ref = obj->GetFieldObject<const mirror::Object*>(field_offset, false);
+        mirror::Object* ref = obj->GetFieldObject<mirror::Object*>(field_offset, false);
         visitor(obj, ref, field_offset, is_static);
       }
     }
@@ -151,11 +154,11 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitObjectArrayReferences(const mirror::ObjectArray<mirror::Object>* array,
+inline void MarkSweep::VisitObjectArrayReferences(mirror::ObjectArray<mirror::Object>* array,
                                                   const Visitor& visitor) {
   const size_t length = static_cast<size_t>(array->GetLength());
   for (size_t i = 0; i < length; ++i) {
-    const mirror::Object* element = array->GetWithoutChecks(static_cast<int32_t>(i));
+    mirror::Object* element = array->GetWithoutChecks(static_cast<int32_t>(i));
     const size_t width = sizeof(mirror::Object*);
     MemberOffset offset(i * width + mirror::Array::DataOffset(width).Int32Value());
     visitor(array, element, offset, false);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 6790144..a5e66d2 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -28,6 +28,7 @@
 #include "base/timing_logger.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
@@ -99,7 +100,7 @@
   } else {
     const space::ContinuousSpace* prev_space = nullptr;
     // Find out if the previous space is immune.
-    for (space::ContinuousSpace* cur_space : GetHeap()->GetContinuousSpaces()) {
+    for (const space::ContinuousSpace* cur_space : GetHeap()->GetContinuousSpaces()) {
       if (cur_space == space) {
         break;
       }
@@ -107,15 +108,19 @@
     }
     // If previous space was immune, then extend the immune region. Relies on continuous spaces
     // being sorted by Heap::AddContinuousSpace.
-    if (prev_space != NULL &&
-        immune_begin_ <= reinterpret_cast<Object*>(prev_space->Begin()) &&
-        immune_end_ >= reinterpret_cast<Object*>(prev_space->End())) {
+    if (prev_space != NULL && IsImmuneSpace(prev_space)) {
       immune_begin_ = std::min(reinterpret_cast<Object*>(space->Begin()), immune_begin_);
       immune_end_ = std::max(reinterpret_cast<Object*>(space->End()), immune_end_);
     }
   }
 }
 
+bool MarkSweep::IsImmuneSpace(const space::ContinuousSpace* space) {
+  return
+      immune_begin_ <= reinterpret_cast<Object*>(space->Begin()) &&
+      immune_end_ >= reinterpret_cast<Object*>(space->End());
+}
+
 void MarkSweep::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -263,11 +268,23 @@
   }
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
   MarkConcurrentRoots();
-
-  heap_->UpdateAndMarkModUnion(this, timings_, GetGcType());
+  UpdateAndMarkModUnion();
   MarkReachableObjects();
 }
 
+void MarkSweep::UpdateAndMarkModUnion() {
+  for (const auto& space : heap_->GetContinuousSpaces()) {
+    if (IsImmuneSpace(space)) {
+      const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
+          "UpdateAndMarkImageModUnionTable";
+      base::TimingLogger::ScopedSplit split(name, &timings_);
+      accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
+      CHECK(mod_union_table != nullptr);
+      mod_union_table->UpdateAndMarkReferences(MarkRootCallback, this);
+    }
+  }
+}
+
 void MarkSweep::MarkThreadRoots(Thread* self) {
   MarkRootsCheckpoint(self);
 }
@@ -519,24 +536,18 @@
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(const Object* root, void* arg) {
+Object* MarkSweep::MarkRootParallelCallback(Object* root, void* arg) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(root);
+  return root;
 }
 
-void MarkSweep::MarkObjectCallback(const Object* root, void* arg) {
-  DCHECK(root != NULL);
-  DCHECK(arg != NULL);
-  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNull(root);
-}
-
-void MarkSweep::ReMarkObjectVisitor(const Object* root, void* arg) {
-  DCHECK(root != NULL);
-  DCHECK(arg != NULL);
-  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNull(root);
+Object* MarkSweep::MarkRootCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(root);
+  return root;
 }
 
 void MarkSweep::VerifyRootCallback(const Object* root, void* arg, size_t vreg,
@@ -564,30 +575,30 @@
 // Marks all objects in the root set.
 void MarkSweep::MarkRoots() {
   timings_.StartSplit("MarkRoots");
-  Runtime::Current()->VisitNonConcurrentRoots(MarkObjectCallback, this);
+  Runtime::Current()->VisitNonConcurrentRoots(MarkRootCallback, this);
   timings_.EndSplit();
 }
 
 void MarkSweep::MarkNonThreadRoots() {
   timings_.StartSplit("MarkNonThreadRoots");
-  Runtime::Current()->VisitNonThreadRoots(MarkObjectCallback, this);
+  Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
   timings_.EndSplit();
 }
 
 void MarkSweep::MarkConcurrentRoots() {
   timings_.StartSplit("MarkConcurrentRoots");
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(MarkObjectCallback, this, false, true);
+  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, false, true);
   timings_.EndSplit();
 }
 
 void MarkSweep::CheckObject(const Object* obj) {
   DCHECK(obj != NULL);
-  VisitObjectReferences(obj, [this](const Object* obj, const Object* ref, MemberOffset offset,
-      bool is_static) NO_THREAD_SAFETY_ANALYSIS {
+  VisitObjectReferences(const_cast<Object*>(obj), [this](const Object* obj, const Object* ref,
+      MemberOffset offset, bool is_static) NO_THREAD_SAFETY_ANALYSIS {
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     CheckReference(obj, ref, offset, is_static);
-  });
+  }, true);
 }
 
 void MarkSweep::VerifyImageRootVisitor(Object* root, void* arg) {
@@ -653,11 +664,11 @@
     explicit ScanObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task) ALWAYS_INLINE
         : chunk_task_(chunk_task) {}
 
-    void operator()(const Object* obj) const {
+    void operator()(Object* obj) const {
       MarkSweep* mark_sweep = chunk_task_->mark_sweep_;
       mark_sweep->ScanObjectVisit(obj,
-          [mark_sweep, this](const Object* /* obj */, const Object* ref,
-              const MemberOffset& /* offset */, bool /* is_static */) ALWAYS_INLINE {
+          [mark_sweep, this](Object* /* obj */, Object* ref, const MemberOffset& /* offset */,
+              bool /* is_static */) ALWAYS_INLINE {
         if (ref != nullptr && mark_sweep->MarkObjectParallel(ref)) {
           if (kUseFinger) {
             android_memory_barrier();
@@ -714,11 +725,11 @@
     static const size_t kFifoSize = 4;
     BoundedFifoPowerOfTwo<const Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      const Object* obj = NULL;
+      const Object* obj = nullptr;
       if (kUseMarkStackPrefetch) {
         while (mark_stack_pos_ != 0 && prefetch_fifo.size() < kFifoSize) {
           const Object* obj = mark_stack_[--mark_stack_pos_];
-          DCHECK(obj != NULL);
+          DCHECK(obj != nullptr);
           __builtin_prefetch(obj);
           prefetch_fifo.push_back(obj);
         }
@@ -733,8 +744,8 @@
         }
         obj = mark_stack_[--mark_stack_pos_];
       }
-      DCHECK(obj != NULL);
-      visitor(obj);
+      DCHECK(obj != nullptr);
+      visitor(const_cast<mirror::Object*>(obj));
     }
   }
 };
@@ -990,8 +1001,11 @@
   ProcessMarkStack(false);
 }
 
-bool MarkSweep::IsMarkedCallback(const Object* object, void* arg) {
-  return reinterpret_cast<MarkSweep*>(arg)->IsMarked(object);
+mirror::Object* MarkSweep::SystemWeakIsMarkedCallback(Object* object, void* arg) {
+  if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
+    return object;
+  }
+  return nullptr;
 }
 
 void MarkSweep::RecursiveMarkDirtyObjects(bool paused, byte minimum_age) {
@@ -1001,45 +1015,21 @@
 
 void MarkSweep::ReMarkRoots() {
   timings_.StartSplit("ReMarkRoots");
-  Runtime::Current()->VisitRoots(ReMarkObjectVisitor, this, true, true);
+  Runtime::Current()->VisitRoots(MarkRootCallback, this, true, true);
   timings_.EndSplit();
 }
 
-void MarkSweep::SweepJniWeakGlobals(IsMarkedTester is_marked, void* arg) {
-  Runtime::Current()->GetJavaVM()->SweepWeakGlobals(is_marked, arg);
-}
-
-struct ArrayMarkedCheck {
-  accounting::ObjectStack* live_stack;
-  MarkSweep* mark_sweep;
-};
-
-// Either marked or not live.
-bool MarkSweep::IsMarkedArrayCallback(const Object* object, void* arg) {
-  ArrayMarkedCheck* array_check = reinterpret_cast<ArrayMarkedCheck*>(arg);
-  if (array_check->mark_sweep->IsMarked(object)) {
-    return true;
-  }
-  accounting::ObjectStack* live_stack = array_check->live_stack;
-  if (std::find(live_stack->Begin(), live_stack->End(), object) == live_stack->End()) {
-    return true;
-  }
-  return false;
-}
-
 void MarkSweep::SweepSystemWeaks() {
   Runtime* runtime = Runtime::Current();
   timings_.StartSplit("SweepSystemWeaks");
-  runtime->GetInternTable()->SweepInternTableWeaks(IsMarkedCallback, this);
-  runtime->GetMonitorList()->SweepMonitorList(IsMarkedCallback, this);
-  SweepJniWeakGlobals(IsMarkedCallback, this);
+  runtime->SweepSystemWeaks(SystemWeakIsMarkedCallback, this);
   timings_.EndSplit();
 }
 
-bool MarkSweep::VerifyIsLiveCallback(const Object* obj, void* arg) {
+mirror::Object* MarkSweep::VerifySystemWeakIsLiveCallback(Object* obj, void* arg) {
   reinterpret_cast<MarkSweep*>(arg)->VerifyIsLive(obj);
   // We don't actually want to sweep the object, so lets return "marked"
-  return true;
+  return obj;
 }
 
 void MarkSweep::VerifyIsLive(const Object* obj) {
@@ -1058,11 +1048,8 @@
 }
 
 void MarkSweep::VerifySystemWeaks() {
-  Runtime* runtime = Runtime::Current();
-  // Verify system weaks, uses a special IsMarked callback which always returns true.
-  runtime->GetInternTable()->SweepInternTableWeaks(VerifyIsLiveCallback, this);
-  runtime->GetMonitorList()->SweepMonitorList(VerifyIsLiveCallback, this);
-  runtime->GetJavaVM()->SweepWeakGlobals(VerifyIsLiveCallback, this);
+  // Verify system weaks, uses a special object visitor which returns the input object.
+  Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
 
 struct SweepCallbackContext {
@@ -1396,7 +1383,7 @@
 // and dispatches to a specialized scanning routine.
 void MarkSweep::ScanObject(const Object* obj) {
   MarkObjectVisitor visitor(this);
-  ScanObjectVisit(obj, visitor);
+  ScanObjectVisit(const_cast<Object*>(obj), visitor);
 }
 
 void MarkSweep::ProcessMarkStackParallel(size_t thread_count) {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index feef992..19df2da 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -114,6 +114,9 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool IsImmuneSpace(const space::ContinuousSpace* space)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -137,6 +140,9 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  virtual void UpdateAndMarkModUnion()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Sweeps unmarked objects to complete the garbage collection.
   virtual void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -163,7 +169,7 @@
 
   // TODO: enable thread safety analysis when in use by multiple worker threads.
   template <typename MarkVisitor>
-  void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
+  void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
   size_t GetFreedBytes() const {
@@ -204,7 +210,7 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static bool VerifyIsLiveCallback(const mirror::Object* obj, void* arg)
+  static mirror::Object* VerifySystemWeakIsLiveCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   void VerifySystemWeaks()
@@ -215,15 +221,16 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   template <typename Visitor>
-  static void VisitObjectReferences(const mirror::Object* obj, const Visitor& visitor)
+  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor,
+                                    bool visit_class = false)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
-  static void MarkObjectCallback(const mirror::Object* root, void* arg)
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void MarkRootParallelCallback(const mirror::Object* root, void* arg);
+  static mirror::Object* MarkRootParallelCallback(mirror::Object* root, void* arg);
 
   // Marks an object.
   void MarkObject(const mirror::Object* obj)
@@ -242,16 +249,12 @@
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const mirror::Object* object) const;
 
-  static bool IsMarkedCallback(const mirror::Object* object, void* arg)
+  static mirror::Object* SystemWeakIsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static bool IsMarkedArrayCallback(const mirror::Object* object, void* arg)
+  static mirror::Object* SystemWeakIsMarkedArrayCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void ReMarkObjectVisitor(const mirror::Object* root, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
@@ -310,7 +313,7 @@
   size_t GetThreadCount(bool paused) const;
 
   // Returns true if an object is inside of the immune region (assumed to be marked).
-  bool IsImmune(const mirror::Object* obj) const {
+  bool IsImmune(const mirror::Object* obj) const ALWAYS_INLINE {
     return obj >= immune_begin_ && obj < immune_end_;
   }
 
@@ -321,34 +324,34 @@
       NO_THREAD_SAFETY_ANALYSIS;
 
   template <typename Visitor>
-  static void VisitInstanceFieldsReferences(const mirror::Class* klass, const mirror::Object* obj,
+  static void VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
                                             const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Visit the header, static field references, and interface pointers of a class object.
   template <typename Visitor>
-  static void VisitClassReferences(const mirror::Class* klass, const mirror::Object* obj,
+  static void VisitClassReferences(mirror::Class* klass, mirror::Object* obj,
                                    const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   template <typename Visitor>
-  static void VisitStaticFieldsReferences(const mirror::Class* klass, const Visitor& visitor)
+  static void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   template <typename Visitor>
-  static void VisitFieldsReferences(const mirror::Object* obj, uint32_t ref_offsets, bool is_static,
+  static void VisitFieldsReferences(mirror::Object* obj, uint32_t ref_offsets, bool is_static,
                                     const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Visit all of the references in an object array.
   template <typename Visitor>
-  static void VisitObjectArrayReferences(const mirror::ObjectArray<mirror::Object>* array,
+  static void VisitObjectArrayReferences(mirror::ObjectArray<mirror::Object>* array,
                                          const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Visits the header and field references of a data object.
   template <typename Visitor>
-  static void VisitOtherReferences(const mirror::Class* klass, const mirror::Object* obj,
+  static void VisitOtherReferences(mirror::Class* klass, mirror::Object* obj,
                                    const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     return VisitInstanceFieldsReferences(klass, obj, visitor);
@@ -390,9 +393,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepJniWeakGlobals(IsMarkedTester is_marked, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 79c4359..8bee00f 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -31,6 +31,10 @@
     return kGcTypeSticky;
   }
 
+  // Don't need to do anything special here since we scan all the cards which may have references
+  // to the newly allocated objects.
+  virtual void UpdateAndMarkModUnion() { }
+
   explicit StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix = "");
   ~StickyMarkSweep() {}
 
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
new file mode 100644
index 0000000..b7ef77c
--- /dev/null
+++ b/runtime/gc/heap-inl.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_HEAP_INL_H_
+#define ART_RUNTIME_GC_HEAP_INL_H_
+
+#include "heap.h"
+
+#include "debugger.h"
+#include "gc/space/dlmalloc_space-inl.h"
+#include "gc/space/large_object_space.h"
+#include "object_utils.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace gc {
+
+inline mirror::Object* Heap::AllocObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
+  DebugCheckPreconditionsForAllobObject(c, byte_count);
+  mirror::Object* obj;
+  size_t bytes_allocated;
+  AllocationTimer alloc_timer(this, &obj);
+  bool large_object_allocation = TryAllocLargeObjectUninstrumented(self, c, byte_count,
+                                                                   &obj, &bytes_allocated);
+  if (LIKELY(!large_object_allocation)) {
+    // Non-large object allocation.
+    obj = AllocateUninstrumented(self, alloc_space_, byte_count, &bytes_allocated);
+    // Ensure that we did not allocate into a zygote space.
+    DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
+  }
+  if (LIKELY(obj != NULL)) {
+    obj->SetClass(c);
+    // Record allocation after since we want to use the atomic add for the atomic fence to guard
+    // the SetClass since we do not want the class to appear NULL in another thread.
+    size_t new_num_bytes_allocated = RecordAllocationUninstrumented(bytes_allocated, obj);
+    DCHECK(!Dbg::IsAllocTrackingEnabled());
+    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
+    if (kDesiredHeapVerification > kNoHeapVerification) {
+      VerifyObject(obj);
+    }
+    return obj;
+  }
+  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
+  return NULL;
+}
+
+inline size_t Heap::RecordAllocationUninstrumented(size_t size, mirror::Object* obj) {
+  DCHECK(obj != NULL);
+  DCHECK_GT(size, 0u);
+  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
+
+  DCHECK(!Runtime::Current()->HasStatsEnabled());
+
+  // This is safe to do since the GC will never free objects which are neither in the allocation
+  // stack or the live bitmap.
+  while (!allocation_stack_->AtomicPushBack(obj)) {
+    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+  }
+
+  return old_num_bytes_allocated + size;
+}
+
+inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                                         bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  DCHECK(!running_on_valgrind_);
+  return space->Alloc(self, alloc_size, bytes_allocated);
+}
+
+// DlMallocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                                         bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  DCHECK(!running_on_valgrind_);
+  return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+}
+
+template <class T>
+inline mirror::Object* Heap::AllocateUninstrumented(Thread* self, T* space, size_t alloc_size,
+                                                    size_t* bytes_allocated) {
+  // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
+  // done in the runnable state where suspension is expected.
+  DCHECK_EQ(self->GetState(), kRunnable);
+  self->AssertThreadSuspensionIsAllowable();
+
+  mirror::Object* ptr = TryToAllocateUninstrumented(self, space, alloc_size, false, bytes_allocated);
+  if (LIKELY(ptr != NULL)) {
+    return ptr;
+  }
+  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
+}
+
+inline bool Heap::TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                                    mirror::Object** obj_ptr, size_t* bytes_allocated) {
+  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
+  if (UNLIKELY(large_object_allocation)) {
+    mirror::Object* obj = AllocateUninstrumented(self, large_object_space_, byte_count, bytes_allocated);
+    // Make sure that our large object didn't get placed anywhere within the space interval or else
+    // it breaks the immune range.
+    DCHECK(obj == NULL ||
+           reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
+           reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
+    *obj_ptr = obj;
+  }
+  return large_object_allocation;
+}
+
+inline void Heap::DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count) {
+  DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
+         (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
+         ClassHelper(c).GetDescriptorAsStringPiece().length() == 0);
+  DCHECK_GE(byte_count, sizeof(mirror::Object));
+}
+
+inline Heap::AllocationTimer::AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr)
+    : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr) {
+  if (kMeasureAllocationTime) {
+    allocation_start_time_ = NanoTime() / kTimeAdjust;
+  }
+}
+
+inline Heap::AllocationTimer::~AllocationTimer() {
+  if (kMeasureAllocationTime) {
+    mirror::Object* allocated_obj = *allocated_obj_ptr_;
+    // Only if the allocation succeeded, record the time.
+    if (allocated_obj != NULL) {
+      uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
+      heap_->total_allocation_time_.fetch_add(allocation_end_time - allocation_start_time_);
+    }
+  }
+};
+
+inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) {
+  // We need to have a zygote space or else our newly allocated large object can end up in the
+  // Zygote resulting in it being prematurely freed.
+  // We can only do this for primitive objects since large objects will not be within the card table
+  // range. This also means that we rely on SetClass not dirtying the object's card.
+  return byte_count >= kLargeObjectThreshold && have_zygote_space_ && c->IsPrimitiveArray();
+}
+
+inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow) {
+  size_t new_footprint = num_bytes_allocated_ + alloc_size;
+  if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
+    if (UNLIKELY(new_footprint > growth_limit_)) {
+      return true;
+    }
+    if (!concurrent_gc_) {
+      if (!grow) {
+        return true;
+      } else {
+        max_allowed_footprint_ = new_footprint;
+      }
+    }
+  }
+  return false;
+}
+
+inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj) {
+  if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
+    // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
+    SirtRef<mirror::Object> ref(self, obj);
+    RequestConcurrentGC(self);
+  }
+}
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_HEAP_INL_H_
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 1b46257..ed90242 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -39,6 +39,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
+#include "heap-inl.h"
 #include "image.h"
 #include "invoke_arg_array_builder.h"
 #include "mirror/art_field-inl.h"
@@ -63,8 +64,6 @@
 static constexpr bool kDumpGcPerformanceOnShutdown = false;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
-// If true, measure the total allocation time.
-static constexpr bool kMeasureAllocationTime = false;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& original_image_file_name,
@@ -105,7 +104,6 @@
           :  std::numeric_limits<size_t>::max()),
       total_bytes_freed_ever_(0),
       total_objects_freed_ever_(0),
-      large_object_threshold_(3 * kPageSize),
       num_bytes_allocated_(0),
       native_bytes_allocated_(0),
       gc_memory_overhead_(0),
@@ -191,11 +189,11 @@
   card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
   CHECK(card_table_.get() != NULL) << "Failed to create card table";
 
-  image_mod_union_table_.reset(new accounting::ModUnionTableToZygoteAllocspace(this));
-  CHECK(image_mod_union_table_.get() != NULL) << "Failed to create image mod-union table";
-
-  zygote_mod_union_table_.reset(new accounting::ModUnionTableCardCache(this));
-  CHECK(zygote_mod_union_table_.get() != NULL) << "Failed to create Zygote mod-union table";
+  accounting::ModUnionTable* mod_union_table =
+      new accounting::ModUnionTableToZygoteAllocspace("Image mod-union table", this,
+                                                      GetImageSpace());
+  CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
+  AddModUnionTable(mod_union_table);
 
   // TODO: Count objects in the image space here.
   num_bytes_allocated_ = 0;
@@ -238,6 +236,11 @@
   }
 
   CHECK_NE(max_allowed_footprint_, 0U);
+
+  if (running_on_valgrind_) {
+    Runtime::Current()->InstrumentQuickAllocEntryPoints();
+  }
+
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
   }
@@ -489,10 +492,7 @@
   live_stack_->Reset();
 
   VLOG(heap) << "~Heap()";
-  // We can't take the heap lock here because there might be a daemon thread suspended with the
-  // heap lock held. We know though that no non-daemon threads are executing, and we know that
-  // all daemon threads are suspended, and we also know that the threads list have been deleted, so
-  // those threads can't resume. We're the only running thread, and we can do whatever we like...
+  STLDeleteValues(&mod_union_tables_);
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
@@ -554,81 +554,69 @@
   }
 }
 
-mirror::Object* Heap::AllocObject(Thread* self, mirror::Class* c, size_t byte_count) {
-  DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
-         (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
-         strlen(ClassHelper(c).GetDescriptor()) == 0);
-  DCHECK_GE(byte_count, sizeof(mirror::Object));
-
-  mirror::Object* obj = NULL;
-  size_t bytes_allocated = 0;
-  uint64_t allocation_start = 0;
-  if (UNLIKELY(kMeasureAllocationTime)) {
-    allocation_start = NanoTime() / kTimeAdjust;
+void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) {
+  std::ostringstream oss;
+  int64_t total_bytes_free = GetFreeMemory();
+  oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
+      << " free bytes";
+  // If the allocation failed due to fragmentation, print out the largest continuous allocation.
+  if (!large_object_allocation && total_bytes_free >= byte_count) {
+    size_t max_contiguous_allocation = 0;
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsDlMallocSpace()) {
+        space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+      }
+    }
+    oss << "; failed due to fragmentation (largest possible contiguous allocation "
+        <<  max_contiguous_allocation << " bytes)";
   }
+  self->ThrowOutOfMemoryError(oss.str().c_str());
+}
 
-  // We need to have a zygote space or else our newly allocated large object can end up in the
-  // Zygote resulting in it being prematurely freed.
-  // We can only do this for primitive objects since large objects will not be within the card table
-  // range. This also means that we rely on SetClass not dirtying the object's card.
-  bool large_object_allocation =
-      byte_count >= large_object_threshold_ && have_zygote_space_ && c->IsPrimitiveArray();
+inline bool Heap::TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                                  mirror::Object** obj_ptr, size_t* bytes_allocated) {
+  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
   if (UNLIKELY(large_object_allocation)) {
-    obj = Allocate(self, large_object_space_, byte_count, &bytes_allocated);
+    mirror::Object* obj = AllocateInstrumented(self, large_object_space_, byte_count, bytes_allocated);
     // Make sure that our large object didn't get placed anywhere within the space interval or else
     // it breaks the immune range.
     DCHECK(obj == NULL ||
            reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
            reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
-  } else {
-    obj = Allocate(self, alloc_space_, byte_count, &bytes_allocated);
+    *obj_ptr = obj;
+  }
+  return large_object_allocation;
+}
+
+mirror::Object* Heap::AllocObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
+  DebugCheckPreconditionsForAllobObject(c, byte_count);
+  mirror::Object* obj;
+  size_t bytes_allocated;
+  AllocationTimer alloc_timer(this, &obj);
+  bool large_object_allocation = TryAllocLargeObjectInstrumented(self, c, byte_count,
+                                                                 &obj, &bytes_allocated);
+  if (LIKELY(!large_object_allocation)) {
+    // Non-large object allocation.
+    obj = AllocateInstrumented(self, alloc_space_, byte_count, &bytes_allocated);
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
-
   if (LIKELY(obj != NULL)) {
     obj->SetClass(c);
-
     // Record allocation after since we want to use the atomic add for the atomic fence to guard
     // the SetClass since we do not want the class to appear NULL in another thread.
-    RecordAllocation(bytes_allocated, obj);
-
+    size_t new_num_bytes_allocated = RecordAllocationInstrumented(bytes_allocated, obj);
     if (Dbg::IsAllocTrackingEnabled()) {
       Dbg::RecordAllocation(c, byte_count);
     }
-    if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_) >= concurrent_start_bytes_)) {
-      // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
-      SirtRef<mirror::Object> ref(self, obj);
-      RequestConcurrentGC(self);
-    }
+    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
     if (kDesiredHeapVerification > kNoHeapVerification) {
       VerifyObject(obj);
     }
-
-    if (UNLIKELY(kMeasureAllocationTime)) {
-      total_allocation_time_.fetch_add(NanoTime() / kTimeAdjust - allocation_start);
-    }
-
     return obj;
-  } else {
-    std::ostringstream oss;
-    int64_t total_bytes_free = GetFreeMemory();
-    oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
-        << " free bytes";
-    // If the allocation failed due to fragmentation, print out the largest continuous allocation.
-    if (!large_object_allocation && total_bytes_free >= byte_count) {
-      size_t max_contiguous_allocation = 0;
-      for (const auto& space : continuous_spaces_) {
-        if (space->IsDlMallocSpace()) {
-          space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
-        }
-      }
-      oss << "; failed due to fragmentation (largest possible contiguous allocation "
-          <<  max_contiguous_allocation << " bytes)";
-    }
-    self->ThrowOutOfMemoryError(oss.str().c_str());
-    return NULL;
   }
+  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
+  return NULL;
 }
 
 bool Heap::IsHeapAddress(const mirror::Object* obj) {
@@ -771,10 +759,10 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-inline void Heap::RecordAllocation(size_t size, mirror::Object* obj) {
+inline size_t Heap::RecordAllocationInstrumented(size_t size, mirror::Object* obj) {
   DCHECK(obj != NULL);
   DCHECK_GT(size, 0u);
-  num_bytes_allocated_.fetch_add(size);
+  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
 
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
@@ -792,6 +780,8 @@
   while (!allocation_stack_->AtomicPushBack(obj)) {
     CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
   }
+
+  return old_num_bytes_allocated + size;
 }
 
 void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
@@ -810,25 +800,8 @@
   }
 }
 
-inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow) {
-  size_t new_footprint = num_bytes_allocated_ + alloc_size;
-  if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
-    if (UNLIKELY(new_footprint > growth_limit_)) {
-      return true;
-    }
-    if (!concurrent_gc_) {
-      if (!grow) {
-        return true;
-      } else {
-        max_allowed_footprint_ = new_footprint;
-      }
-    }
-  }
-  return false;
-}
-
-inline mirror::Object* Heap::TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                           bool grow, size_t* bytes_allocated) {
+inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                                       bool grow, size_t* bytes_allocated) {
   if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
     return NULL;
   }
@@ -836,8 +809,8 @@
 }
 
 // DlMallocSpace-specific version.
-inline mirror::Object* Heap::TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                           bool grow, size_t* bytes_allocated) {
+inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                                       bool grow, size_t* bytes_allocated) {
   if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
     return NULL;
   }
@@ -849,15 +822,15 @@
 }
 
 template <class T>
-inline mirror::Object* Heap::Allocate(Thread* self, T* space, size_t alloc_size,
-                                      size_t* bytes_allocated) {
+inline mirror::Object* Heap::AllocateInstrumented(Thread* self, T* space, size_t alloc_size,
+                                                  size_t* bytes_allocated) {
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
   DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
 
-  mirror::Object* ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
-  if (ptr != NULL) {
+  mirror::Object* ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
+  if (LIKELY(ptr != NULL)) {
     return ptr;
   }
   return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
@@ -872,7 +845,7 @@
   collector::GcType last_gc = WaitForConcurrentGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
+    ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
     if (ptr != NULL) {
       return ptr;
     }
@@ -907,7 +880,7 @@
       i = static_cast<size_t>(gc_type_ran);
 
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
+      ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
       if (ptr != NULL) {
         return ptr;
       }
@@ -916,7 +889,7 @@
 
   // Allocations have failed after GCs;  this is an exceptional state.
   // Try harder, growing the heap if necessary.
-  ptr = TryToAllocate(self, space, alloc_size, true, bytes_allocated);
+  ptr = TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
   if (ptr != NULL) {
     return ptr;
   }
@@ -931,7 +904,7 @@
 
   // We don't need a WaitForConcurrentGcToComplete here either.
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseForAlloc, true);
-  return TryToAllocate(self, space, alloc_size, true, bytes_allocated);
+  return TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
@@ -1084,15 +1057,15 @@
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(const mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
-    collector::MarkSweep::VisitObjectReferences(o, *this);
+  void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    collector::MarkSweep::VisitObjectReferences(obj, *this, true);
   }
 
   // For MarkSweep::VisitObjectReferences.
-  void operator()(const mirror::Object* referrer, const mirror::Object* object,
+  void operator()(mirror::Object* referrer, mirror::Object* object,
                   const MemberOffset&, bool) const {
     if (object == object_ && (max_count_ == 0 || referring_objects_.size() < max_count_)) {
-      referring_objects_.push_back(const_cast<mirror::Object*>(referrer));
+      referring_objects_.push_back(referrer);
     }
   }
 
@@ -1157,6 +1130,12 @@
   AddContinuousSpace(alloc_space_);
   have_zygote_space_ = true;
 
+  // Create the zygote space mod union table.
+  accounting::ModUnionTable* mod_union_table =
+      new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
+  CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
+  AddModUnionTable(mod_union_table);
+
   // Reset the cumulative loggers since we now have a few additional timing phases.
   for (const auto& collector : mark_sweep_collectors_) {
     collector->ResetCumulativeStatistics();
@@ -1313,38 +1292,12 @@
   return gc_type;
 }
 
-void Heap::UpdateAndMarkModUnion(collector::MarkSweep* mark_sweep, base::TimingLogger& timings,
-                                 collector::GcType gc_type) {
-  if (gc_type == collector::kGcTypeSticky) {
-    // Don't need to do anything for mod union table in this case since we are only scanning dirty
-    // cards.
-    return;
-  }
-
-  base::TimingLogger::ScopedSplit split("UpdateModUnionTable", &timings);
-  // Update zygote mod union table.
-  if (gc_type == collector::kGcTypePartial) {
-    base::TimingLogger::ScopedSplit split("UpdateZygoteModUnionTable", &timings);
-    zygote_mod_union_table_->Update();
-
-    timings.NewSplit("ZygoteMarkReferences");
-    zygote_mod_union_table_->MarkReferences(mark_sweep);
-  }
-
-  // Processes the cards we cleared earlier and adds their objects into the mod-union table.
-  timings.NewSplit("UpdateModUnionTable");
-  image_mod_union_table_->Update();
-
-  // Scans all objects in the mod-union table.
-  timings.NewSplit("MarkImageToAllocSpaceReferences");
-  image_mod_union_table_->MarkReferences(mark_sweep);
-}
-
-static void RootMatchesObjectVisitor(const mirror::Object* root, void* arg) {
+static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg) {
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
   if (root == obj) {
     LOG(INFO) << "Object " << obj << " is a root";
   }
+  return root;
 }
 
 class ScanVisitor {
@@ -1459,9 +1412,10 @@
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
-  static void VerifyRoots(const mirror::Object* root, void* arg) {
+  static mirror::Object* VerifyRoots(mirror::Object* root, void* arg) {
     VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    (*visitor)(NULL, root, MemberOffset(0), true);
+    (*visitor)(nullptr, root, MemberOffset(0), true);
+    return root;
   }
 
  private:
@@ -1481,7 +1435,7 @@
     VerifyReferenceVisitor visitor(heap_);
     // The class doesn't count as a reference but we should verify it anyways.
     visitor(obj, obj->GetClass(), MemberOffset(0), false);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor);
+    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
     failed_ = failed_ || visitor.Failed();
   }
 
@@ -1514,8 +1468,10 @@
   // pointing to dead objects if they are not reachable.
   if (visitor.Failed()) {
     // Dump mod-union tables.
-    image_mod_union_table_->Dump(LOG(ERROR) << "Image mod-union table: ");
-    zygote_mod_union_table_->Dump(LOG(ERROR) << "Zygote mod-union table: ");
+    for (const auto& table_pair : mod_union_tables_) {
+      accounting::ModUnionTable* mod_union_table = table_pair.second;
+      mod_union_table->Dump(LOG(ERROR) << mod_union_table->GetName() << ": ");
+    }
     DumpSpaces();
     return false;
   }
@@ -1599,10 +1555,10 @@
       : heap_(heap),
         failed_(false) {}
 
-  void operator()(const mirror::Object* obj) const
+  void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     VerifyReferenceCardVisitor visitor(heap_, const_cast<bool*>(&failed_));
-    collector::MarkSweep::VisitObjectReferences(obj, visitor);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
   }
 
   bool Failed() const {
@@ -1638,15 +1594,23 @@
   allocation_stack_.swap(live_stack_);
 }
 
+accounting::ModUnionTable* Heap::FindModUnionTableFromSpace(space::Space* space) {
+  auto it = mod_union_tables_.find(space);
+  if (it == mod_union_tables_.end()) {
+    return nullptr;
+  }
+  return it->second;
+}
+
 void Heap::ProcessCards(base::TimingLogger& timings) {
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      base::TimingLogger::ScopedSplit split("ImageModUnionClearCards", &timings);
-      image_mod_union_table_->ClearCards(space);
-    } else if (space->IsZygoteSpace()) {
-      base::TimingLogger::ScopedSplit split("ZygoteModUnionClearCards", &timings);
-      zygote_mod_union_table_->ClearCards(space);
+    accounting::ModUnionTable* table = FindModUnionTableFromSpace(space);
+    if (table != nullptr) {
+      const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
+          "ImageModUnionClearCards";
+      base::TimingLogger::ScopedSplit split(name, &timings);
+      table->ClearCards();
     } else {
       base::TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
@@ -1656,6 +1620,10 @@
   }
 }
 
+static mirror::Object* IdentityCallback(mirror::Object* obj, void*) {
+  return obj;
+}
+
 void Heap::PreGcVerification(collector::GarbageCollector* gc) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
@@ -1689,10 +1657,11 @@
   if (verify_mod_union_table_) {
     thread_list->SuspendAll();
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
-    zygote_mod_union_table_->Update();
-    zygote_mod_union_table_->Verify();
-    image_mod_union_table_->Update();
-    image_mod_union_table_->Verify();
+    for (const auto& table_pair : mod_union_tables_) {
+      accounting::ModUnionTable* mod_union_table = table_pair.second;
+      mod_union_table->UpdateAndMarkReferences(IdentityCallback, nullptr);
+      mod_union_table->Verify();
+    }
     thread_list->ResumeAll();
   }
 }
@@ -2030,8 +1999,10 @@
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
   uint64_t ms_time = MilliTime();
-  float utilization =
-      static_cast<float>(alloc_space_->GetBytesAllocated()) / alloc_space_->Size();
+  // Note the large object space's bytes allocated is equal to its capacity.
+  uint64_t los_bytes_allocated = large_object_space_->GetBytesAllocated();
+  float utilization = static_cast<float>(GetBytesAllocated() - los_bytes_allocated) /
+      (GetTotalMemory() - los_bytes_allocated);
   if ((utilization > 0.75f && !IsLowMemoryMode()) || ((ms_time - last_trim_time_ms_) < 2 * 1000)) {
     // Don't bother trimming the alloc space if it's more than 75% utilized and low memory mode is
     // not enabled, or if a heap trim occurred in the last two seconds.
@@ -2146,5 +2117,10 @@
   return ret;
 }
 
+void Heap::AddModUnionTable(accounting::ModUnionTable* mod_union_table) {
+  DCHECK(mod_union_table != nullptr);
+  mod_union_tables_.Put(mod_union_table->GetSpace(), mod_union_table);
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0b64261..ffd3034 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -101,6 +101,11 @@
 };
 static constexpr HeapVerificationMode kDesiredHeapVerification = kNoHeapVerification;
 
+// If true, measure the total allocation time.
+static constexpr bool kMeasureAllocationTime = false;
+// Primitive arrays larger than this size are put in the large object space.
+static constexpr size_t kLargeObjectThreshold = 3 * kPageSize;
+
 class Heap {
  public:
   static constexpr size_t kDefaultInitialSize = 2 * MB;
@@ -129,7 +134,17 @@
 
   // Allocates and initializes storage for an object instance.
   mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocObjectInstrumented(self, klass, num_bytes);
+  }
+  mirror::Object* AllocObjectInstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* AllocObjectUninstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
 
   void RegisterNativeAllocation(int bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -368,11 +383,6 @@
                       accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  // Update and mark mod union table based on gc type.
-  void UpdateAndMarkModUnion(collector::MarkSweep* mark_sweep, base::TimingLogger& timings,
-                             collector::GcType gc_type)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Gets called when we get notified by ActivityThread that the process state has changed.
   void ListenForProcessStateChange();
 
@@ -426,11 +436,28 @@
   size_t GetConcGCThreadCount() const {
     return conc_gc_threads_;
   }
+  accounting::ModUnionTable* FindModUnionTableFromSpace(space::Space* space);
+  void AddModUnionTable(accounting::ModUnionTable* mod_union_table);
 
  private:
+  bool TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                       mirror::Object** obj_ptr, size_t* bytes_allocated)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                         mirror::Object** obj_ptr, size_t* bytes_allocated)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count);
+  void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj);
+
   // Allocates uninitialized storage. Passing in a null space tries to place the object in the
   // large object space.
-  template <class T> mirror::Object* Allocate(Thread* self, T* space, size_t num_bytes, size_t* bytes_allocated)
+  template <class T> mirror::Object* AllocateInstrumented(Thread* self, T* space, size_t num_bytes,
+                                                          size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <class T> mirror::Object* AllocateUninstrumented(Thread* self, T* space, size_t num_bytes,
+                                                            size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -442,17 +469,29 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Try to allocate a number of bytes, this function never does any GCs.
-  mirror::Object* TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size, bool grow,
-                                size_t* bytes_allocated)
+  mirror::Object* TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                            bool grow, size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Try to allocate a number of bytes, this function never does any GCs. DlMallocSpace-specialized version.
-  mirror::Object* TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size, bool grow,
-                                size_t* bytes_allocated)
+  mirror::Object* TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                            bool grow, size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                              bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                              bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow);
 
   // Pushes a list of cleared references out to the managed heap.
@@ -462,7 +501,11 @@
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
-  void RecordAllocation(size_t size, mirror::Object* object)
+  size_t RecordAllocationInstrumented(size_t size, mirror::Object* object)
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  size_t RecordAllocationUninstrumented(size_t size, mirror::Object* object)
       LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -522,12 +565,8 @@
   // The card table, dirtied by the write barrier.
   UniquePtr<accounting::CardTable> card_table_;
 
-  // The mod-union table remembers all of the references from the image space to the alloc /
-  // zygote spaces to allow the card table to be cleared.
-  UniquePtr<accounting::ModUnionTable> image_mod_union_table_;
-
-  // This table holds all of the references from the zygote space to the alloc space.
-  UniquePtr<accounting::ModUnionTable> zygote_mod_union_table_;
+  // A mod-union table remembers all of the references from the it's space to other spaces.
+  SafeMap<space::Space*, accounting::ModUnionTable*> mod_union_tables_;
 
   // What kind of concurrency behavior is the runtime after? True for concurrent mark sweep GC,
   // false for stop-the-world mark sweep.
@@ -615,9 +654,6 @@
   // Since the heap was created, how many objects have been freed.
   size_t total_objects_freed_ever_;
 
-  // Primitive objects larger than this size are put in the large object space.
-  const size_t large_object_threshold_;
-
   // Number of bytes allocated.  Adjusted after each allocation and free.
   AtomicInteger num_bytes_allocated_;
 
@@ -719,6 +755,16 @@
   friend class ScopedHeapLock;
   friend class space::SpaceTest;
 
+  class AllocationTimer {
+   private:
+    Heap* heap_;
+    mirror::Object** allocated_obj_ptr_;
+    uint64_t allocation_start_time_;
+   public:
+    AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr);
+    ~AllocationTimer();
+  };
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
 
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index 5481141..fb2c66b 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -30,7 +30,7 @@
     MutexLock mu(self, lock_);
     obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
   }
-  if (obj != NULL) {
+  if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
     memset(obj, 0, num_bytes);
   }
@@ -39,7 +39,7 @@
 
 inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
   mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
-  if (result != NULL) {
+  if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
             << ") not in bounds of allocation space " << *this;
@@ -47,10 +47,6 @@
     size_t allocation_size = AllocationSizeNonvirtual(result);
     DCHECK(bytes_allocated != NULL);
     *bytes_allocated = allocation_size;
-    num_bytes_allocated_ += allocation_size;
-    total_bytes_allocated_ += allocation_size;
-    ++total_objects_allocated_;
-    ++num_objects_allocated_;
   }
   return result;
 }
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index a9440d3..468d1d2 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -23,7 +23,7 @@
 #include "utils.h"
 
 #include <valgrind.h>
-#include <../memcheck/memcheck.h>
+#include <memcheck/memcheck.h>
 
 namespace art {
 namespace gc {
@@ -119,8 +119,7 @@
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
                        byte* end, size_t growth_limit)
     : MemMapSpace(name, mem_map, end - begin, kGcRetentionPolicyAlwaysCollect),
-      recent_free_pos_(0), num_bytes_allocated_(0), num_objects_allocated_(0),
-      total_bytes_allocated_(0), total_objects_allocated_(0),
+      recent_free_pos_(0), total_bytes_freed_(0), total_objects_freed_(0),
       lock_("allocation space lock", kAllocSpaceLock), mspace_(mspace),
       growth_limit_(growth_limit) {
   CHECK(mspace != NULL);
@@ -353,8 +352,8 @@
     CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
   }
   const size_t bytes_freed = InternalAllocationSize(ptr);
-  num_bytes_allocated_ -= bytes_freed;
-  --num_objects_allocated_;
+  total_bytes_freed_ += bytes_freed;
+  ++total_objects_freed_;
   if (kRecentFreeCount > 0) {
     RegisterRecentFree(ptr);
   }
@@ -400,8 +399,8 @@
 
   {
     MutexLock mu(self, lock_);
-    num_bytes_allocated_ -= bytes_freed;
-    num_objects_allocated_ -= num_ptrs;
+    total_bytes_freed_ += bytes_freed;
+    total_objects_freed_ += num_ptrs;
     mspace_bulk_free(mspace_, reinterpret_cast<void**>(ptrs), num_ptrs);
     return bytes_freed;
   }
@@ -501,6 +500,20 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
+uint64_t DlMallocSpace::GetBytesAllocated() {
+  MutexLock mu(Thread::Current(), lock_);
+  size_t bytes_allocated = 0;
+  mspace_inspect_all(mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
+}
+
+uint64_t DlMallocSpace::GetObjectsAllocated() {
+  MutexLock mu(Thread::Current(), lock_);
+  size_t objects_allocated = 0;
+  mspace_inspect_all(mspace_, DlmallocObjectsAllocatedCallback, &objects_allocated);
+  return objects_allocated;
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index b08da01..522535e 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -127,20 +127,13 @@
   // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
   DlMallocSpace* CreateZygoteSpace(const char* alloc_space_name);
 
-  uint64_t GetBytesAllocated() const {
-    return num_bytes_allocated_;
+  uint64_t GetBytesAllocated();
+  uint64_t GetObjectsAllocated();
+  uint64_t GetTotalBytesAllocated() {
+    return GetBytesAllocated() + total_bytes_freed_;
   }
-
-  uint64_t GetObjectsAllocated() const {
-    return num_objects_allocated_;
-  }
-
-  uint64_t GetTotalBytesAllocated() const {
-    return total_bytes_allocated_;
-  }
-
-  uint64_t GetTotalObjectsAllocated() const {
-    return total_objects_allocated_;
+  uint64_t GetTotalObjectsAllocated() {
+    return GetObjectsAllocated() + total_objects_freed_;
   }
 
   // Returns the class of a recently freed object.
@@ -168,11 +161,9 @@
   std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
   size_t recent_free_pos_;
 
-  // Approximate number of bytes which have been allocated into the space.
-  size_t num_bytes_allocated_;
-  size_t num_objects_allocated_;
-  size_t total_bytes_allocated_;
-  size_t total_objects_allocated_;
+  // Approximate number of bytes and objects which have been deallocated in the space.
+  size_t total_bytes_freed_;
+  size_t total_objects_freed_;
 
   static size_t bitmap_index_;
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index a174c0a..c6d028e 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -17,6 +17,7 @@
 #include "large_object_space.h"
 
 #include "base/logging.h"
+#include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "UniquePtr.h"
 #include "image.h"
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index a703e86..3f2e848 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -41,19 +41,19 @@
   virtual void Walk(DlMallocSpace::WalkCallback, void* arg) = 0;
   virtual ~LargeObjectSpace() {}
 
-  uint64_t GetBytesAllocated() const {
+  uint64_t GetBytesAllocated() {
     return num_bytes_allocated_;
   }
 
-  uint64_t GetObjectsAllocated() const {
+  uint64_t GetObjectsAllocated() {
     return num_objects_allocated_;
   }
 
-  uint64_t GetTotalBytesAllocated() const {
+  uint64_t GetTotalBytesAllocated() {
     return total_bytes_allocated_;
   }
 
-  uint64_t GetTotalObjectsAllocated() const {
+  uint64_t GetTotalObjectsAllocated() {
     return total_objects_allocated_;
   }
 
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 68df563..6dd7952 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -146,13 +146,13 @@
 class AllocSpace {
  public:
   // Number of bytes currently allocated.
-  virtual uint64_t GetBytesAllocated() const = 0;
+  virtual uint64_t GetBytesAllocated() = 0;
   // Number of objects currently allocated.
-  virtual uint64_t GetObjectsAllocated() const = 0;
+  virtual uint64_t GetObjectsAllocated() = 0;
   // Number of bytes allocated since the space was created.
-  virtual uint64_t GetTotalBytesAllocated() const = 0;
+  virtual uint64_t GetTotalBytesAllocated() = 0;
   // Number of objects allocated since the space was created.
-  virtual uint64_t GetTotalObjectsAllocated() const = 0;
+  virtual uint64_t GetTotalObjectsAllocated() = 0;
 
   // Allocate num_bytes without allowing growth. If the allocation
   // succeeds, the output parameter bytes_allocated will be set to the
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 0b2e741..67620a0 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -484,11 +484,11 @@
   }
 
  private:
-  static void RootVisitor(const mirror::Object* obj, void* arg)
+  static mirror::Object* RootVisitor(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(arg != NULL);
-    Hprof* hprof = reinterpret_cast<Hprof*>(arg);
-    hprof->VisitRoot(obj);
+    DCHECK(arg != NULL);
+    reinterpret_cast<Hprof*>(arg)->VisitRoot(obj);
+    return obj;
   }
 
   static void HeapBitmapCallback(mirror::Object* obj, void* arg)
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 8af4d7e..2bd8353 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -40,7 +40,7 @@
   CHECK_LE(initialCount, maxCount);
   CHECK_NE(desiredKind, kSirtOrInvalid);
 
-  table_ = reinterpret_cast<const mirror::Object**>(malloc(initialCount * sizeof(const mirror::Object*)));
+  table_ = reinterpret_cast<mirror::Object**>(malloc(initialCount * sizeof(const mirror::Object*)));
   CHECK(table_ != NULL);
   memset(table_, 0xd1, initialCount * sizeof(const mirror::Object*));
 
@@ -75,7 +75,7 @@
   return true;
 }
 
-IndirectRef IndirectReferenceTable::Add(uint32_t cookie, const mirror::Object* obj) {
+IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
   IRTSegmentState prevState;
   prevState.all = cookie;
   size_t topIndex = segment_state_.parts.topIndex;
@@ -101,7 +101,7 @@
     }
     DCHECK_GT(newSize, alloc_entries_);
 
-    table_ = reinterpret_cast<const mirror::Object**>(realloc(table_, newSize * sizeof(const mirror::Object*)));
+    table_ = reinterpret_cast<mirror::Object**>(realloc(table_, newSize * sizeof(mirror::Object*)));
     slot_data_ = reinterpret_cast<IndirectRefSlot*>(realloc(slot_data_,
                                                             newSize * sizeof(IndirectRefSlot)));
     if (table_ == NULL || slot_data_ == NULL) {
@@ -126,7 +126,7 @@
   if (numHoles > 0) {
     DCHECK_GT(topIndex, 1U);
     // Find the first hole; likely to be near the end of the list.
-    const mirror::Object** pScan = &table_[topIndex - 1];
+    mirror::Object** pScan = &table_[topIndex - 1];
     DCHECK(*pScan != NULL);
     while (*--pScan != NULL) {
       DCHECK_GE(pScan, table_ + prevState.parts.topIndex);
@@ -194,7 +194,8 @@
   return true;
 }
 
-static int Find(mirror::Object* direct_pointer, int bottomIndex, int topIndex, const mirror::Object** table) {
+static int Find(mirror::Object* direct_pointer, int bottomIndex, int topIndex,
+                mirror::Object** table) {
   for (int i = bottomIndex; i < topIndex; ++i) {
     if (table[i] == direct_pointer) {
       return i;
@@ -310,13 +311,14 @@
 
 void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
   for (auto ref : *this) {
-    visitor(*ref, arg);
+    *ref = visitor(const_cast<mirror::Object*>(*ref), arg);
+    DCHECK(*ref != nullptr);
   }
 }
 
 void IndirectReferenceTable::Dump(std::ostream& os) const {
   os << kind_ << " table dump:\n";
-  std::vector<const mirror::Object*> entries(table_, table_ + Capacity());
+  ReferenceTable::Table entries(table_, table_ + Capacity());
   // Remove NULLs.
   for (int i = entries.size() - 1; i >= 0; --i) {
     if (entries[i] == NULL) {
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 26f53db..51b238c 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -206,7 +206,7 @@
 
 class IrtIterator {
  public:
-  explicit IrtIterator(const mirror::Object** table, size_t i, size_t capacity)
+  explicit IrtIterator(mirror::Object** table, size_t i, size_t capacity)
       : table_(table), i_(i), capacity_(capacity) {
     SkipNullsAndTombstones();
   }
@@ -217,7 +217,7 @@
     return *this;
   }
 
-  const mirror::Object** operator*() {
+  mirror::Object** operator*() {
     return &table_[i_];
   }
 
@@ -233,7 +233,7 @@
     }
   }
 
-  const mirror::Object** table_;
+  mirror::Object** table_;
   size_t i_;
   size_t capacity_;
 };
@@ -258,7 +258,7 @@
    * Returns NULL if the table is full (max entries reached, or alloc
    * failed during expansion).
    */
-  IndirectRef Add(uint32_t cookie, const mirror::Object* obj)
+  IndirectRef Add(uint32_t cookie, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
@@ -266,7 +266,7 @@
    *
    * Returns kInvalidIndirectRefObject if iref is invalid.
    */
-  const mirror::Object* Get(IndirectRef iref) const {
+  mirror::Object* Get(IndirectRef iref) const {
     if (!GetChecked(iref)) {
       return kInvalidIndirectRefObject;
     }
@@ -363,7 +363,7 @@
   IRTSegmentState segment_state_;
 
   /* bottom of the stack */
-  const mirror::Object** table_;
+  mirror::Object** table_;
   /* bit mask, ORed into all irefs */
   IndirectRefKind kind_;
   /* extended debugging info */
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index bd2890c..b6c6cb4 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -17,6 +17,7 @@
 #include "common_test.h"
 
 #include "indirect_reference_table.h"
+#include "mirror/object-inl.h"
 
 namespace art {
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 6caad01..481cbad 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -41,6 +41,11 @@
 namespace art {
 namespace instrumentation {
 
+// Do we want to deoptimize for method entry and exit listeners or just try to intercept
+// invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
+// application's performance.
+static constexpr bool kDeoptimizeForAccurateMethodEntryExitListeners = false;
+
 static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
@@ -264,12 +269,14 @@
   bool require_interpreter = false;
   if ((events & kMethodEntered) != 0) {
     method_entry_listeners_.push_back(listener);
-    require_entry_exit_stubs = true;
+    require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners;
+    require_entry_exit_stubs = !kDeoptimizeForAccurateMethodEntryExitListeners;
     have_method_entry_listeners_ = true;
   }
   if ((events & kMethodExited) != 0) {
     method_exit_listeners_.push_back(listener);
-    require_entry_exit_stubs = true;
+    require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners;
+    require_entry_exit_stubs = !kDeoptimizeForAccurateMethodEntryExitListeners;
     have_method_exit_listeners_ = true;
   }
   if ((events & kMethodUnwind) != 0) {
@@ -300,7 +307,10 @@
       method_entry_listeners_.remove(listener);
     }
     have_method_entry_listeners_ = method_entry_listeners_.size() > 0;
-    require_entry_exit_stubs |= have_method_entry_listeners_;
+    require_entry_exit_stubs |= have_method_entry_listeners_ &&
+        !kDeoptimizeForAccurateMethodEntryExitListeners;
+    require_interpreter = have_method_entry_listeners_ &&
+        kDeoptimizeForAccurateMethodEntryExitListeners;
   }
   if ((events & kMethodExited) != 0) {
     bool contains = std::find(method_exit_listeners_.begin(), method_exit_listeners_.end(),
@@ -309,7 +319,10 @@
       method_exit_listeners_.remove(listener);
     }
     have_method_exit_listeners_ = method_exit_listeners_.size() > 0;
-    require_entry_exit_stubs |= have_method_exit_listeners_;
+    require_entry_exit_stubs |= have_method_exit_listeners_ &&
+        !kDeoptimizeForAccurateMethodEntryExitListeners;
+    require_interpreter = have_method_exit_listeners_ &&
+        kDeoptimizeForAccurateMethodEntryExitListeners;
   }
   if ((events & kMethodUnwind) != 0) {
     method_unwind_listeners_.remove(listener);
@@ -455,7 +468,7 @@
 void Instrumentation::ExceptionCaughtEvent(Thread* thread, const ThrowLocation& throw_location,
                                            mirror::ArtMethod* catch_method,
                                            uint32_t catch_dex_pc,
-                                           mirror::Throwable* exception_object) {
+                                           mirror::Throwable* exception_object) const {
   if (have_exception_caught_listeners_) {
     DCHECK_EQ(thread->GetException(NULL), exception_object);
     thread->ClearException();
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 6c80b41..28f9555 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -186,7 +186,7 @@
   // Inform listeners that an exception was caught.
   void ExceptionCaughtEvent(Thread* thread, const ThrowLocation& throw_location,
                             mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
-                            mirror::Throwable* exception_object)
+                            mirror::Throwable* exception_object) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Called when an instrumented method is entered. The intended link register (lr) is saved so
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 2072979..8f9e072 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -47,15 +47,16 @@
                              bool only_dirty, bool clean_dirty) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
   if (!only_dirty || is_dirty_) {
-    for (const auto& strong_intern : strong_interns_) {
-      visitor(strong_intern.second, arg);
+    for (auto& strong_intern : strong_interns_) {
+      strong_intern.second = reinterpret_cast<mirror::String*>(visitor(strong_intern.second, arg));
+      DCHECK(strong_intern.second != nullptr);
     }
+
     if (clean_dirty) {
       is_dirty_ = false;
     }
   }
-  // Note: we deliberately don't visit the weak_interns_ table and the immutable
-  // image roots.
+  // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
 mirror::String* InternTable::Lookup(Table& table, mirror::String* s,
@@ -216,14 +217,16 @@
   return found == s;
 }
 
-void InternTable::SweepInternTableWeaks(IsMarkedTester is_marked, void* arg) {
+void InternTable::SweepInternTableWeaks(RootVisitor visitor, void* arg) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
-  // TODO: std::remove_if + lambda.
   for (auto it = weak_interns_.begin(), end = weak_interns_.end(); it != end;) {
     mirror::Object* object = it->second;
-    if (!is_marked(object, arg)) {
+    mirror::Object* new_object = visitor(object, arg);
+    if (new_object == nullptr) {
+      // TODO: use it = weak_interns_.erase(it) when we get a c++11 stl.
       weak_interns_.erase(it++);
     } else {
+      it->second = down_cast<mirror::String*>(new_object);
       ++it;
     }
   }
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index e68af90..eec63c8 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -55,8 +55,7 @@
   // Interns a potentially new string in the 'weak' table. (See above.)
   mirror::String* InternWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepInternTableWeaks(IsMarkedTester is_marked, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void SweepInternTableWeaks(RootVisitor visitor, void* arg);
 
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index d79d2c4..aa2502d 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -81,8 +81,11 @@
   mutable std::vector<const mirror::String*> expected_;
 };
 
-bool IsMarked(const mirror::Object* object, void* arg) {
-  return reinterpret_cast<TestPredicate*>(arg)->IsMarked(object);
+mirror::Object* IsMarkedSweepingVisitor(mirror::Object* object, void* arg) {
+  if (reinterpret_cast<TestPredicate*>(arg)->IsMarked(object)) {
+    return object;
+  }
+  return nullptr;
 }
 
 TEST_F(InternTableTest, SweepInternTableWeaks) {
@@ -105,7 +108,7 @@
   p.Expect(s1.get());
   {
     ReaderMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
-    t.SweepInternTableWeaks(IsMarked, &p);
+    t.SweepInternTableWeaks(IsMarkedSweepingVisitor, &p);
   }
 
   EXPECT_EQ(2U, t.Size());
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 8cd2ac8..48c0014 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -14,153 +14,11 @@
  * limitations under the License.
  */
 
-#include "interpreter.h"
-
-#include <math.h>
-
-#include "base/logging.h"
-#include "class_linker-inl.h"
-#include "common_throws.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-#include "dex_instruction.h"
-#include "entrypoints/entrypoint_utils.h"
-#include "gc/accounting/card_table-inl.h"
-#include "invoke_arg_array_builder.h"
-#include "nth_caller_visitor.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/class.h"
-#include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
-#include "object_utils.h"
-#include "ScopedLocalRef.h"
-#include "scoped_thread_state_change.h"
-#include "thread.h"
-#include "well_known_classes.h"
-
-using ::art::mirror::ArtField;
-using ::art::mirror::ArtMethod;
-using ::art::mirror::Array;
-using ::art::mirror::BooleanArray;
-using ::art::mirror::ByteArray;
-using ::art::mirror::CharArray;
-using ::art::mirror::Class;
-using ::art::mirror::ClassLoader;
-using ::art::mirror::IntArray;
-using ::art::mirror::LongArray;
-using ::art::mirror::Object;
-using ::art::mirror::ObjectArray;
-using ::art::mirror::ShortArray;
-using ::art::mirror::String;
-using ::art::mirror::Throwable;
+#include "interpreter_common.h"
 
 namespace art {
-
 namespace interpreter {
 
-static const int32_t kMaxInt = std::numeric_limits<int32_t>::max();
-static const int32_t kMinInt = std::numeric_limits<int32_t>::min();
-static const int64_t kMaxLong = std::numeric_limits<int64_t>::max();
-static const int64_t kMinLong = std::numeric_limits<int64_t>::min();
-
-static void UnstartedRuntimeInvoke(Thread* self, MethodHelper& mh,
-                                   const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
-                                   JValue* result, size_t arg_offset)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // In a runtime that's not started we intercept certain methods to avoid complicated dependency
-  // problems in core libraries.
-  std::string name(PrettyMethod(shadow_frame->GetMethod()));
-  if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)") {
-    std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
-    ClassLoader* class_loader = NULL;  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
-    Class* found = Runtime::Current()->GetClassLinker()->FindClass(descriptor.c_str(),
-                                                                   class_loader);
-    CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
-        << PrettyDescriptor(descriptor);
-    result->SetL(found);
-  } else if (name == "java.lang.Object java.lang.Class.newInstance()") {
-    Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
-    ArtMethod* c = klass->FindDeclaredDirectMethod("<init>", "()V");
-    CHECK(c != NULL);
-    SirtRef<Object> obj(self, klass->AllocObject(self));
-    CHECK(obj.get() != NULL);
-    EnterInterpreterFromInvoke(self, c, obj.get(), NULL, NULL);
-    result->SetL(obj.get());
-  } else if (name == "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") {
-    // Special managed code cut-out to allow field lookup in a un-started runtime that'd fail
-    // going the reflective Dex way.
-    Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
-    String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
-    ArtField* found = NULL;
-    FieldHelper fh;
-    ObjectArray<ArtField>* fields = klass->GetIFields();
-    for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
-      ArtField* f = fields->Get(i);
-      fh.ChangeField(f);
-      if (name->Equals(fh.GetName())) {
-        found = f;
-      }
-    }
-    if (found == NULL) {
-      fields = klass->GetSFields();
-      for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
-        ArtField* f = fields->Get(i);
-        fh.ChangeField(f);
-        if (name->Equals(fh.GetName())) {
-          found = f;
-        }
-      }
-    }
-    CHECK(found != NULL)
-      << "Failed to find field in Class.getDeclaredField in un-started runtime. name="
-      << name->ToModifiedUtf8() << " class=" << PrettyDescriptor(klass);
-    // TODO: getDeclaredField calls GetType once the field is found to ensure a
-    //       NoClassDefFoundError is thrown if the field's type cannot be resolved.
-    Class* jlr_Field = self->DecodeJObject(WellKnownClasses::java_lang_reflect_Field)->AsClass();
-    SirtRef<Object> field(self, jlr_Field->AllocObject(self));
-    CHECK(field.get() != NULL);
-    ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
-    uint32_t args[1];
-    args[0] = reinterpret_cast<uint32_t>(found);
-    EnterInterpreterFromInvoke(self, c, field.get(), args, NULL);
-    result->SetL(field.get());
-  } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)" ||
-             name == "void java.lang.System.arraycopy(char[], int, char[], int, int)") {
-    // Special case array copying without initializing System.
-    Class* ctype = shadow_frame->GetVRegReference(arg_offset)->GetClass()->GetComponentType();
-    jint srcPos = shadow_frame->GetVReg(arg_offset + 1);
-    jint dstPos = shadow_frame->GetVReg(arg_offset + 3);
-    jint length = shadow_frame->GetVReg(arg_offset + 4);
-    if (!ctype->IsPrimitive()) {
-      ObjectArray<Object>* src = shadow_frame->GetVRegReference(arg_offset)->AsObjectArray<Object>();
-      ObjectArray<Object>* dst = shadow_frame->GetVRegReference(arg_offset + 2)->AsObjectArray<Object>();
-      for (jint i = 0; i < length; ++i) {
-        dst->Set(dstPos + i, src->Get(srcPos + i));
-      }
-    } else if (ctype->IsPrimitiveChar()) {
-      CharArray* src = shadow_frame->GetVRegReference(arg_offset)->AsCharArray();
-      CharArray* dst = shadow_frame->GetVRegReference(arg_offset + 2)->AsCharArray();
-      for (jint i = 0; i < length; ++i) {
-        dst->Set(dstPos + i, src->Get(srcPos + i));
-      }
-    } else if (ctype->IsPrimitiveInt()) {
-      IntArray* src = shadow_frame->GetVRegReference(arg_offset)->AsIntArray();
-      IntArray* dst = shadow_frame->GetVRegReference(arg_offset + 2)->AsIntArray();
-      for (jint i = 0; i < length; ++i) {
-        dst->Set(dstPos + i, src->Get(srcPos + i));
-      }
-    } else {
-      UNIMPLEMENTED(FATAL) << "System.arraycopy of unexpected type: " << PrettyDescriptor(ctype);
-    }
-  } else {
-    // Not special, continue with regular interpreter execution.
-    artInterpreterToInterpreterBridge(self, mh, code_item, shadow_frame, result);
-  }
-}
-
 // Hand select a number of methods to be run in a not yet started runtime without using JNI.
 static void UnstartedRuntimeJni(Thread* self, ArtMethod* method,
                                 Object* receiver, uint32_t* args, JValue* result)
@@ -224,7 +82,7 @@
   }
 }
 
-static void InterpreterJni(Thread* self, ArtMethod* method, StringPiece shorty,
+static void InterpreterJni(Thread* self, ArtMethod* method, const StringPiece& shorty,
                            Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // TODO: The following enters JNI code using a typedef-ed function rather than the JNI compiler,
@@ -406,2703 +264,12 @@
   }
 }
 
-static void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
-  ref->MonitorEnter(self);
-}
+enum InterpreterImplKind {
+  kSwitchImpl,            // switch-based interpreter implementation.
+  kComputedGotoImplKind   // computed-goto-based interpreter implementation.
+};
 
-static void DoMonitorExit(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
-  ref->MonitorExit(self);
-}
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<InvokeType type, bool is_range, bool do_access_check>
-static bool DoInvoke(Thread* self, ShadowFrame& shadow_frame,
-                     const Instruction* inst, JValue* result) NO_THREAD_SAFETY_ANALYSIS;
-
-template<InvokeType type, bool is_range, bool do_access_check>
-static bool DoInvoke(Thread* self, ShadowFrame& shadow_frame,
-                     const Instruction* inst, JValue* result) {
-  bool do_assignability_check = do_access_check;
-  uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-  Object* receiver = (type == kStatic) ? NULL : shadow_frame.GetVRegReference(vregC);
-  ArtMethod* method = FindMethodFromCode(method_idx, receiver, shadow_frame.GetMethod(), self,
-                                         do_access_check, type);
-  if (UNLIKELY(method == NULL)) {
-    CHECK(self->IsExceptionPending());
-    result->SetJ(0);
-    return false;
-  } else if (UNLIKELY(method->IsAbstract())) {
-    ThrowAbstractMethodError(method);
-    result->SetJ(0);
-    return false;
-  }
-
-  MethodHelper mh(method);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
-  uint16_t num_regs;
-  uint16_t num_ins;
-  if (LIKELY(code_item != NULL)) {
-    num_regs = code_item->registers_size_;
-    num_ins = code_item->ins_size_;
-  } else {
-    DCHECK(method->IsNative() || method->IsProxyMethod());
-    num_regs = num_ins = ArtMethod::NumArgRegisters(mh.GetShorty());
-    if (!method->IsStatic()) {
-      num_regs++;
-      num_ins++;
-    }
-  }
-
-  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, method, 0, memory));
-  size_t cur_reg = num_regs - num_ins;
-  if (receiver != NULL) {
-    new_shadow_frame->SetVRegReference(cur_reg, receiver);
-    ++cur_reg;
-  }
-
-  const DexFile::TypeList* params;
-  if (do_assignability_check) {
-    params = mh.GetParameterTypeList();
-  }
-  size_t arg_offset = (receiver == NULL) ? 0 : 1;
-  const char* shorty = mh.GetShorty();
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetArgs(arg);
-  }
-  for (size_t shorty_pos = 0; cur_reg < num_regs; ++shorty_pos, cur_reg++, arg_offset++) {
-    DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
-    size_t arg_pos = is_range ? vregC + arg_offset : arg[arg_offset];
-    switch (shorty[shorty_pos + 1]) {
-      case 'L': {
-        Object* o = shadow_frame.GetVRegReference(arg_pos);
-        if (do_assignability_check && o != NULL) {
-          Class* arg_type = mh.GetClassFromTypeIdx(params->GetTypeItem(shorty_pos).type_idx_);
-          if (arg_type == NULL) {
-            CHECK(self->IsExceptionPending());
-            return false;
-          }
-          if (!o->VerifierInstanceOf(arg_type)) {
-            // This should never happen.
-            self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
-                                     "Ljava/lang/VirtualMachineError;",
-                                     "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
-                                     mh.GetName(), shorty_pos,
-                                     ClassHelper(o->GetClass()).GetDescriptor(),
-                                     ClassHelper(arg_type).GetDescriptor());
-            return false;
-          }
-        }
-        new_shadow_frame->SetVRegReference(cur_reg, o);
-        break;
-      }
-      case 'J': case 'D': {
-        uint64_t wide_value = (static_cast<uint64_t>(shadow_frame.GetVReg(arg_pos + 1)) << 32) |
-                              static_cast<uint32_t>(shadow_frame.GetVReg(arg_pos));
-        new_shadow_frame->SetVRegLong(cur_reg, wide_value);
-        cur_reg++;
-        arg_offset++;
-        break;
-      }
-      default:
-        new_shadow_frame->SetVReg(cur_reg, shadow_frame.GetVReg(arg_pos));
-        break;
-    }
-  }
-
-  if (LIKELY(Runtime::Current()->IsStarted())) {
-    (method->GetEntryPointFromInterpreter())(self, mh, code_item, new_shadow_frame, result);
-  } else {
-    UnstartedRuntimeInvoke(self, mh, code_item, new_shadow_frame, result, num_regs - num_ins);
-  }
-  return !self->IsExceptionPending();
-}
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<bool is_range>
-static bool DoInvokeVirtualQuick(Thread* self, ShadowFrame& shadow_frame,
-                                 const Instruction* inst, JValue* result)
-    NO_THREAD_SAFETY_ANALYSIS;
-
-template<bool is_range>
-static bool DoInvokeVirtualQuick(Thread* self, ShadowFrame& shadow_frame,
-                                 const Instruction* inst, JValue* result) {
-  uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-  Object* receiver = shadow_frame.GetVRegReference(vregC);
-  if (UNLIKELY(receiver == NULL)) {
-    // We lost the reference to the method index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  // TODO: use ObjectArray<T>::GetWithoutChecks ?
-  ArtMethod* method = receiver->GetClass()->GetVTable()->Get(vtable_idx);
-  if (UNLIKELY(method == NULL)) {
-    CHECK(self->IsExceptionPending());
-    result->SetJ(0);
-    return false;
-  } else if (UNLIKELY(method->IsAbstract())) {
-    ThrowAbstractMethodError(method);
-    result->SetJ(0);
-    return false;
-  }
-
-  MethodHelper mh(method);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
-  uint16_t num_regs;
-  uint16_t num_ins;
-  if (code_item != NULL) {
-    num_regs = code_item->registers_size_;
-    num_ins = code_item->ins_size_;
-  } else {
-    DCHECK(method->IsNative() || method->IsProxyMethod());
-    num_regs = num_ins = ArtMethod::NumArgRegisters(mh.GetShorty());
-    if (!method->IsStatic()) {
-      num_regs++;
-      num_ins++;
-    }
-  }
-
-  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame,
-                                                    method, 0, memory));
-  size_t cur_reg = num_regs - num_ins;
-  if (receiver != NULL) {
-    new_shadow_frame->SetVRegReference(cur_reg, receiver);
-    ++cur_reg;
-  }
-
-  size_t arg_offset = (receiver == NULL) ? 0 : 1;
-  const char* shorty = mh.GetShorty();
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetArgs(arg);
-  }
-  for (size_t shorty_pos = 0; cur_reg < num_regs; ++shorty_pos, cur_reg++, arg_offset++) {
-    DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
-    size_t arg_pos = is_range ? vregC + arg_offset : arg[arg_offset];
-    switch (shorty[shorty_pos + 1]) {
-      case 'L': {
-        Object* o = shadow_frame.GetVRegReference(arg_pos);
-        new_shadow_frame->SetVRegReference(cur_reg, o);
-        break;
-      }
-      case 'J': case 'D': {
-        uint64_t wide_value = (static_cast<uint64_t>(shadow_frame.GetVReg(arg_pos + 1)) << 32) |
-                              static_cast<uint32_t>(shadow_frame.GetVReg(arg_pos));
-        new_shadow_frame->SetVRegLong(cur_reg, wide_value);
-        cur_reg++;
-        arg_offset++;
-        break;
-      }
-      default:
-        new_shadow_frame->SetVReg(cur_reg, shadow_frame.GetVReg(arg_pos));
-        break;
-    }
-  }
-
-  if (LIKELY(Runtime::Current()->IsStarted())) {
-    (method->GetEntryPointFromInterpreter())(self, mh, code_item, new_shadow_frame, result);
-  } else {
-    UnstartedRuntimeInvoke(self, mh, code_item, new_shadow_frame, result, num_regs - num_ins);
-  }
-  return !self->IsExceptionPending();
-}
-
-// We use template functions to optimize compiler inlining process. Otherwise,
-// some parts of the code (like a switch statement) which depend on a constant
-// parameter would not be inlined while it should be. These constant parameters
-// are now part of the template arguments.
-// Note these template functions are static and inlined so they should not be
-// part of the final object file.
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
-                       const Instruction* inst)
-    NO_THREAD_SAFETY_ANALYSIS ALWAYS_INLINE;
-
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static inline bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
-                              const Instruction* inst) {
-  bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
-  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode(field_idx, shadow_frame.GetMethod(), self,
-                                  find_type, Primitive::FieldSize(field_type),
-                                  do_access_check);
-  if (UNLIKELY(f == NULL)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  Object* obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c());
-    if (UNLIKELY(obj == NULL)) {
-      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(), f, true);
-      return false;
-    }
-  }
-  uint32_t vregA = is_static ? inst->VRegA_21c() : inst->VRegA_22c();
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
-      break;
-    case Primitive::kPrimByte:
-      shadow_frame.SetVReg(vregA, f->GetByte(obj));
-      break;
-    case Primitive::kPrimChar:
-      shadow_frame.SetVReg(vregA, f->GetChar(obj));
-      break;
-    case Primitive::kPrimShort:
-      shadow_frame.SetVReg(vregA, f->GetShort(obj));
-      break;
-    case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, f->GetInt(obj));
-      break;
-    case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
-      break;
-    case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, f->GetObject(obj));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<Primitive::Type field_type>
-static bool DoIGetQuick(Thread* self, ShadowFrame& shadow_frame,
-                       const Instruction* inst)
-    NO_THREAD_SAFETY_ANALYSIS ALWAYS_INLINE;
-
-template<Primitive::Type field_type>
-static inline bool DoIGetQuick(Thread* self, ShadowFrame& shadow_frame,
-                               const Instruction* inst) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c());
-  if (UNLIKELY(obj == NULL)) {
-    // We lost the reference to the field index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  MemberOffset field_offset(inst->VRegC_22c());
-  const bool is_volatile = false;  // iget-x-quick only on non volatile fields.
-  const uint32_t vregA = inst->VRegA_22c();
-  switch (field_type) {
-    case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, static_cast<int32_t>(obj->GetField32(field_offset, is_volatile)));
-      break;
-    case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset, is_volatile)));
-      break;
-    case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object*>(field_offset, is_volatile));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
-                       const Instruction* inst)
-    NO_THREAD_SAFETY_ANALYSIS ALWAYS_INLINE;
-
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static inline bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
-                              const Instruction* inst) {
-  bool do_assignability_check = do_access_check;
-  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
-  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode(field_idx, shadow_frame.GetMethod(), self,
-                                  find_type, Primitive::FieldSize(field_type),
-                                  do_access_check);
-  if (UNLIKELY(f == NULL)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  Object* obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c());
-    if (UNLIKELY(obj == NULL)) {
-      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(),
-                                              f, false);
-      return false;
-    }
-  }
-  uint32_t vregA = is_static ? inst->VRegA_21c() : inst->VRegA_22c();
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      f->SetBoolean(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimByte:
-      f->SetByte(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimChar:
-      f->SetChar(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimShort:
-      f->SetShort(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimInt:
-      f->SetInt(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimLong:
-      f->SetLong(obj, shadow_frame.GetVRegLong(vregA));
-      break;
-    case Primitive::kPrimNot: {
-      Object* reg = shadow_frame.GetVRegReference(vregA);
-      if (do_assignability_check && reg != NULL) {
-        Class* field_class = FieldHelper(f).GetType();
-        if (!reg->VerifierInstanceOf(field_class)) {
-          // This should never happen.
-          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
-                                   "Ljava/lang/VirtualMachineError;",
-                                   "Put '%s' that is not instance of field '%s' in '%s'",
-                                   ClassHelper(reg->GetClass()).GetDescriptor(),
-                                   ClassHelper(field_class).GetDescriptor(),
-                                   ClassHelper(f->GetDeclaringClass()).GetDescriptor());
-          return false;
-        }
-      }
-      f->SetObj(obj, reg);
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<Primitive::Type field_type>
-static bool DoIPutQuick(Thread* self, ShadowFrame& shadow_frame,
-                       const Instruction* inst)
-    NO_THREAD_SAFETY_ANALYSIS ALWAYS_INLINE;
-
-template<Primitive::Type field_type>
-static inline bool DoIPutQuick(Thread* self, ShadowFrame& shadow_frame,
-                               const Instruction* inst) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c());
-  if (UNLIKELY(obj == NULL)) {
-    // We lost the reference to the field index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  MemberOffset field_offset(inst->VRegC_22c());
-  const bool is_volatile = false;  // iput-x-quick only on non volatile fields.
-  const uint32_t vregA = inst->VRegA_22c();
-  switch (field_type) {
-    case Primitive::kPrimInt:
-      obj->SetField32(field_offset, shadow_frame.GetVReg(vregA), is_volatile);
-      break;
-    case Primitive::kPrimLong:
-      obj->SetField64(field_offset, shadow_frame.GetVRegLong(vregA), is_volatile);
-      break;
-    case Primitive::kPrimNot:
-      obj->SetFieldObject(field_offset, shadow_frame.GetVRegReference(vregA), is_volatile);
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
-
-static inline String* ResolveString(Thread* self, MethodHelper& mh, uint32_t string_idx)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Class* java_lang_string_class = String::GetJavaLangString();
-  if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    if (UNLIKELY(!class_linker->EnsureInitialized(java_lang_string_class,
-                                                  true, true))) {
-      DCHECK(self->IsExceptionPending());
-      return NULL;
-    }
-  }
-  return mh.ResolveString(string_idx);
-}
-
-static inline bool DoIntDivide(ShadowFrame& shadow_frame, size_t result_reg,
-                               int32_t dividend, int32_t divisor)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (UNLIKELY(divisor == 0)) {
-    ThrowArithmeticExceptionDivideByZero();
-    return false;
-  }
-  if (UNLIKELY(dividend == kMinInt && divisor == -1)) {
-    shadow_frame.SetVReg(result_reg, kMinInt);
-  } else {
-    shadow_frame.SetVReg(result_reg, dividend / divisor);
-  }
-  return true;
-}
-
-static inline bool DoIntRemainder(ShadowFrame& shadow_frame, size_t result_reg,
-                                  int32_t dividend, int32_t divisor)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (UNLIKELY(divisor == 0)) {
-    ThrowArithmeticExceptionDivideByZero();
-    return false;
-  }
-  if (UNLIKELY(dividend == kMinInt && divisor == -1)) {
-    shadow_frame.SetVReg(result_reg, 0);
-  } else {
-    shadow_frame.SetVReg(result_reg, dividend % divisor);
-  }
-  return true;
-}
-
-static inline bool DoLongDivide(ShadowFrame& shadow_frame, size_t result_reg,
-                                int64_t dividend, int64_t divisor)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (UNLIKELY(divisor == 0)) {
-    ThrowArithmeticExceptionDivideByZero();
-    return false;
-  }
-  if (UNLIKELY(dividend == kMinLong && divisor == -1)) {
-    shadow_frame.SetVRegLong(result_reg, kMinLong);
-  } else {
-    shadow_frame.SetVRegLong(result_reg, dividend / divisor);
-  }
-  return true;
-}
-
-static inline bool DoLongRemainder(ShadowFrame& shadow_frame, size_t result_reg,
-                                   int64_t dividend, int64_t divisor)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (UNLIKELY(divisor == 0)) {
-    ThrowArithmeticExceptionDivideByZero();
-    return false;
-  }
-  if (UNLIKELY(dividend == kMinLong && divisor == -1)) {
-    shadow_frame.SetVRegLong(result_reg, 0);
-  } else {
-    shadow_frame.SetVRegLong(result_reg, dividend % divisor);
-  }
-  return true;
-}
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-// Returns true on success, otherwise throws an exception and returns false.
-template <bool is_range, bool do_access_check>
-static bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
-                             Thread* self, JValue* result)
-    NO_THREAD_SAFETY_ANALYSIS ALWAYS_INLINE;
-
-template <bool is_range, bool do_access_check>
-static inline bool DoFilledNewArray(const Instruction* inst,
-                                    const ShadowFrame& shadow_frame,
-                                    Thread* self, JValue* result) {
-  DCHECK(inst->Opcode() == Instruction::FILLED_NEW_ARRAY ||
-         inst->Opcode() == Instruction::FILLED_NEW_ARRAY_RANGE);
-  const int32_t length = is_range ? inst->VRegA_3rc() : inst->VRegA_35c();
-  if (!is_range) {
-    // Checks FILLED_NEW_ARRAY's length does not exceed 5 arguments.
-    CHECK_LE(length, 5);
-  }
-  if (UNLIKELY(length < 0)) {
-    ThrowNegativeArraySizeException(length);
-    return false;
-  }
-  uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  Class* arrayClass = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
-                                             self, false, do_access_check);
-  if (UNLIKELY(arrayClass == NULL)) {
-    DCHECK(self->IsExceptionPending());
-    return false;
-  }
-  CHECK(arrayClass->IsArrayClass());
-  Class* componentClass = arrayClass->GetComponentType();
-  if (UNLIKELY(componentClass->IsPrimitive() && !componentClass->IsPrimitiveInt())) {
-    if (componentClass->IsPrimitiveLong() || componentClass->IsPrimitiveDouble()) {
-      ThrowRuntimeException("Bad filled array request for type %s",
-                            PrettyDescriptor(componentClass).c_str());
-    } else {
-      self->ThrowNewExceptionF(shadow_frame.GetCurrentLocationForThrow(),
-                               "Ljava/lang/InternalError;",
-                               "Found type %s; filled-new-array not implemented for anything but \'int\'",
-                               PrettyDescriptor(componentClass).c_str());
-    }
-    return false;
-  }
-  Object* newArray = Array::Alloc(self, arrayClass, length);
-  if (UNLIKELY(newArray == NULL)) {
-    DCHECK(self->IsExceptionPending());
-    return false;
-  }
-  if (is_range) {
-    uint32_t vregC = inst->VRegC_3rc();
-    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
-    for (int32_t i = 0; i < length; ++i) {
-      if (is_primitive_int_component) {
-        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(vregC + i));
-      } else {
-        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(vregC + i));
-      }
-    }
-  } else {
-    uint32_t arg[5];
-    inst->GetArgs(arg);
-    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
-    for (int32_t i = 0; i < length; ++i) {
-      if (is_primitive_int_component) {
-        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(arg[i]));
-      } else {
-        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(arg[i]));
-      }
-    }
-  }
-
-  result->SetL(newArray);
-  return true;
-}
-
-static inline const Instruction* DoSparseSwitch(const Instruction* inst,
-                                                const ShadowFrame& shadow_frame)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(inst->Opcode() == Instruction::SPARSE_SWITCH);
-  const uint16_t* switch_data = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
-  int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t());
-  DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
-  uint16_t size = switch_data[1];
-  DCHECK_GT(size, 0);
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
-  DCHECK(IsAligned<4>(keys));
-  const int32_t* entries = keys + size;
-  DCHECK(IsAligned<4>(entries));
-  int lo = 0;
-  int hi = size - 1;
-  while (lo <= hi) {
-    int mid = (lo + hi) / 2;
-    int32_t foundVal = keys[mid];
-    if (test_val < foundVal) {
-      hi = mid - 1;
-    } else if (test_val > foundVal) {
-      lo = mid + 1;
-    } else {
-      return inst->RelativeAt(entries[mid]);
-    }
-  }
-  return inst->Next_3xx();
-}
-
-static inline const Instruction* FindNextInstructionFollowingException(Thread* self,
-                                                                       ShadowFrame& shadow_frame,
-                                                                       uint32_t dex_pc,
-                                                                       const uint16_t* insns,
-                                                                       SirtRef<Object>& this_object_ref,
-                                                                       instrumentation::Instrumentation* instrumentation)
-    ALWAYS_INLINE;
-
-static inline const Instruction* FindNextInstructionFollowingException(Thread* self,
-                                                                       ShadowFrame& shadow_frame,
-                                                                       uint32_t dex_pc,
-                                                                       const uint16_t* insns,
-                                                                       SirtRef<Object>& this_object_ref,
-                                                                       instrumentation::Instrumentation* instrumentation)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  self->VerifyStack();
-  ThrowLocation throw_location;
-  mirror::Throwable* exception = self->GetException(&throw_location);
-  bool clear_exception;
-  uint32_t found_dex_pc = shadow_frame.GetMethod()->FindCatchBlock(exception->GetClass(), dex_pc,
-                                                                   &clear_exception);
-  if (found_dex_pc == DexFile::kDexNoIndex) {
-    instrumentation->MethodUnwindEvent(self, this_object_ref.get(),
-                                       shadow_frame.GetMethod(), dex_pc);
-    return NULL;
-  } else {
-    instrumentation->ExceptionCaughtEvent(self, throw_location,
-                                          shadow_frame.GetMethod(),
-                                          found_dex_pc, exception);
-    if (clear_exception) {
-      self->ClearException();
-    }
-    return Instruction::At(insns + found_dex_pc);
-  }
-}
-
-#define HANDLE_PENDING_EXCEPTION() \
-  CHECK(self->IsExceptionPending()); \
-  inst = FindNextInstructionFollowingException(self, shadow_frame, inst->GetDexPc(insns), insns, \
-                                               this_object_ref, instrumentation); \
-  if (inst == NULL) { \
-    return JValue(); /* Handled in caller. */ \
-  }
-
-#define POSSIBLY_HANDLE_PENDING_EXCEPTION(is_exception_pending, next_function) \
-  if (UNLIKELY(is_exception_pending)) { \
-    HANDLE_PENDING_EXCEPTION(); \
-  } else { \
-    inst = inst->next_function(); \
-  }
-
-static void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
-  __attribute__((cold, noreturn, noinline));
-
-static void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(&mh.GetDexFile());
-  exit(0);  // Unreachable, keep GCC happy.
-}
-
-// Code to run before each dex instruction.
-#define PREAMBLE()
-
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) which is failing due to template
-// specialization.
-template<bool do_access_check>
-static JValue ExecuteImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
-                      ShadowFrame& shadow_frame, JValue result_register)
-    NO_THREAD_SAFETY_ANALYSIS __attribute__((hot));
-
-template<bool do_access_check>
-static JValue ExecuteImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
-                      ShadowFrame& shadow_frame, JValue result_register) {
-  bool do_assignability_check = do_access_check;
-  if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
-    LOG(FATAL) << "Invalid shadow frame for interpreter use";
-    return JValue();
-  }
-  self->VerifyStack();
-  instrumentation::Instrumentation* const instrumentation = Runtime::Current()->GetInstrumentation();
-
-  // As the 'this' object won't change during the execution of current code, we
-  // want to cache it in local variables. Nevertheless, in order to let the
-  // garbage collector access it, we store it into sirt references.
-  SirtRef<Object> this_object_ref(self, shadow_frame.GetThisObject(code_item->ins_size_));
-
-  uint32_t dex_pc = shadow_frame.GetDexPC();
-  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
-    if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
-      instrumentation->MethodEnterEvent(self, this_object_ref.get(),
-                                        shadow_frame.GetMethod(), 0);
-    }
-  }
-  const uint16_t* const insns = code_item->insns_;
-  const Instruction* inst = Instruction::At(insns + dex_pc);
-  while (true) {
-    dex_pc = inst->GetDexPc(insns);
-    shadow_frame.SetDexPC(dex_pc);
-    if (UNLIKELY(self->TestAllFlags())) {
-      CheckSuspend(self);
-    }
-    if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, this_object_ref.get(),
-                                       shadow_frame.GetMethod(), dex_pc);
-    }
-    const bool kTracing = false;
-    if (kTracing) {
-#define TRACE_LOG std::cerr
-      TRACE_LOG << PrettyMethod(shadow_frame.GetMethod())
-                << StringPrintf("\n0x%x: ", dex_pc)
-                << inst->DumpString(&mh.GetDexFile()) << "\n";
-      for (size_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
-        uint32_t raw_value = shadow_frame.GetVReg(i);
-        Object* ref_value = shadow_frame.GetVRegReference(i);
-        TRACE_LOG << StringPrintf(" vreg%d=0x%08X", i, raw_value);
-        if (ref_value != NULL) {
-          if (ref_value->GetClass()->IsStringClass() &&
-              ref_value->AsString()->GetCharArray() != NULL) {
-            TRACE_LOG << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\"";
-          } else {
-            TRACE_LOG << "/" << PrettyTypeOf(ref_value);
-          }
-        }
-      }
-      TRACE_LOG << "\n";
-#undef TRACE_LOG
-    }
-    switch (inst->Opcode()) {
-      case Instruction::NOP:
-        PREAMBLE();
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(),
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE_FROM16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22x(),
-                             shadow_frame.GetVReg(inst->VRegB_22x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MOVE_16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_32x(),
-                             shadow_frame.GetVReg(inst->VRegB_32x()));
-        inst = inst->Next_3xx();
-        break;
-      case Instruction::MOVE_WIDE:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_12x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE_WIDE_FROM16:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_22x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_22x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MOVE_WIDE_16:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_32x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_32x()));
-        inst = inst->Next_3xx();
-        break;
-      case Instruction::MOVE_OBJECT:
-        PREAMBLE();
-        shadow_frame.SetVRegReference(inst->VRegA_12x(),
-                                      shadow_frame.GetVRegReference(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE_OBJECT_FROM16:
-        PREAMBLE();
-        shadow_frame.SetVRegReference(inst->VRegA_22x(),
-                                      shadow_frame.GetVRegReference(inst->VRegB_22x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MOVE_OBJECT_16:
-        PREAMBLE();
-        shadow_frame.SetVRegReference(inst->VRegA_32x(),
-                                      shadow_frame.GetVRegReference(inst->VRegB_32x()));
-        inst = inst->Next_3xx();
-        break;
-      case Instruction::MOVE_RESULT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_11x(), result_register.GetI());
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE_RESULT_WIDE:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_11x(), result_register.GetJ());
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE_RESULT_OBJECT:
-        PREAMBLE();
-        shadow_frame.SetVRegReference(inst->VRegA_11x(), result_register.GetL());
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::MOVE_EXCEPTION: {
-        PREAMBLE();
-        Throwable* exception = self->GetException(NULL);
-        self->ClearException();
-        shadow_frame.SetVRegReference(inst->VRegA_11x(), exception);
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::RETURN_VOID: {
-        PREAMBLE();
-        JValue result;
-        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
-          instrumentation->MethodExitEvent(self, this_object_ref.get(),
-                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
-                                           result);
-        }
-        return result;
-      }
-      case Instruction::RETURN_VOID_BARRIER: {
-        PREAMBLE();
-        ANDROID_MEMBAR_STORE();
-        JValue result;
-        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
-          instrumentation->MethodExitEvent(self, this_object_ref.get(),
-                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
-                                           result);
-        }
-        return result;
-      }
-      case Instruction::RETURN: {
-        PREAMBLE();
-        JValue result;
-        result.SetJ(0);
-        result.SetI(shadow_frame.GetVReg(inst->VRegA_11x()));
-        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
-          instrumentation->MethodExitEvent(self, this_object_ref.get(),
-                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
-                                           result);
-        }
-        return result;
-      }
-      case Instruction::RETURN_WIDE: {
-        PREAMBLE();
-        JValue result;
-        result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x()));
-        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
-          instrumentation->MethodExitEvent(self, this_object_ref.get(),
-                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
-                                           result);
-        }
-        return result;
-      }
-      case Instruction::RETURN_OBJECT: {
-        PREAMBLE();
-        JValue result;
-        Object* obj_result = shadow_frame.GetVRegReference(inst->VRegA_11x());
-        result.SetJ(0);
-        result.SetL(obj_result);
-        if (do_assignability_check && obj_result != NULL) {
-          Class* return_type = MethodHelper(shadow_frame.GetMethod()).GetReturnType();
-          if (return_type == NULL) {
-            // Return the pending exception.
-            HANDLE_PENDING_EXCEPTION();
-          }
-          if (!obj_result->VerifierInstanceOf(return_type)) {
-            // This should never happen.
-            self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
-                                     "Ljava/lang/VirtualMachineError;",
-                                     "Returning '%s' that is not instance of return type '%s'",
-                                     ClassHelper(obj_result->GetClass()).GetDescriptor(),
-                                     ClassHelper(return_type).GetDescriptor());
-            HANDLE_PENDING_EXCEPTION();
-          }
-        }
-        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
-          instrumentation->MethodExitEvent(self, this_object_ref.get(),
-                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
-                                           result);
-        }
-        return result;
-      }
-      case Instruction::CONST_4: {
-        PREAMBLE();
-        uint4_t dst = inst->VRegA_11n();
-        int4_t val = inst->VRegB_11n();
-        shadow_frame.SetVReg(dst, val);
-        if (val == 0) {
-          shadow_frame.SetVRegReference(dst, NULL);
-        }
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::CONST_16: {
-        PREAMBLE();
-        uint8_t dst = inst->VRegA_21s();
-        int16_t val = inst->VRegB_21s();
-        shadow_frame.SetVReg(dst, val);
-        if (val == 0) {
-          shadow_frame.SetVRegReference(dst, NULL);
-        }
-        inst = inst->Next_2xx();
-        break;
-      }
-      case Instruction::CONST: {
-        PREAMBLE();
-        uint8_t dst = inst->VRegA_31i();
-        int32_t val = inst->VRegB_31i();
-        shadow_frame.SetVReg(dst, val);
-        if (val == 0) {
-          shadow_frame.SetVRegReference(dst, NULL);
-        }
-        inst = inst->Next_3xx();
-        break;
-      }
-      case Instruction::CONST_HIGH16: {
-        PREAMBLE();
-        uint8_t dst = inst->VRegA_21h();
-        int32_t val = static_cast<int32_t>(inst->VRegB_21h() << 16);
-        shadow_frame.SetVReg(dst, val);
-        if (val == 0) {
-          shadow_frame.SetVRegReference(dst, NULL);
-        }
-        inst = inst->Next_2xx();
-        break;
-      }
-      case Instruction::CONST_WIDE_16:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_21s(), inst->VRegB_21s());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::CONST_WIDE_32:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_31i(), inst->VRegB_31i());
-        inst = inst->Next_3xx();
-        break;
-      case Instruction::CONST_WIDE:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_51l(), inst->VRegB_51l());
-        inst = inst->Next_51l();
-        break;
-      case Instruction::CONST_WIDE_HIGH16:
-        shadow_frame.SetVRegLong(inst->VRegA_21h(),
-                                 static_cast<uint64_t>(inst->VRegB_21h()) << 48);
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::CONST_STRING: {
-        PREAMBLE();
-        String* s = ResolveString(self, mh,  inst->VRegB_21c());
-        if (UNLIKELY(s == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          shadow_frame.SetVRegReference(inst->VRegA_21c(), s);
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::CONST_STRING_JUMBO: {
-        PREAMBLE();
-        String* s = ResolveString(self, mh,  inst->VRegB_31c());
-        if (UNLIKELY(s == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          shadow_frame.SetVRegReference(inst->VRegA_31c(), s);
-          inst = inst->Next_3xx();
-        }
-        break;
-      }
-      case Instruction::CONST_CLASS: {
-        PREAMBLE();
-        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
-        if (UNLIKELY(c == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          shadow_frame.SetVRegReference(inst->VRegA_21c(), c);
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::MONITOR_ENTER: {
-        PREAMBLE();
-        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x());
-        if (UNLIKELY(obj == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          DoMonitorEnter(self, obj);
-          POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
-        }
-        break;
-      }
-      case Instruction::MONITOR_EXIT: {
-        PREAMBLE();
-        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x());
-        if (UNLIKELY(obj == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          DoMonitorExit(self, obj);
-          POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
-        }
-        break;
-      }
-      case Instruction::CHECK_CAST: {
-        PREAMBLE();
-        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
-        if (UNLIKELY(c == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          Object* obj = shadow_frame.GetVRegReference(inst->VRegA_21c());
-          if (UNLIKELY(obj != NULL && !obj->InstanceOf(c))) {
-            ThrowClassCastException(c, obj->GetClass());
-            HANDLE_PENDING_EXCEPTION();
-          } else {
-            inst = inst->Next_2xx();
-          }
-        }
-        break;
-      }
-      case Instruction::INSTANCE_OF: {
-        PREAMBLE();
-        Class* c = ResolveVerifyAndClinit(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
-        if (UNLIKELY(c == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c());
-          shadow_frame.SetVReg(inst->VRegA_22c(), (obj != NULL && obj->InstanceOf(c)) ? 1 : 0);
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::ARRAY_LENGTH:  {
-        PREAMBLE();
-        Object* array = shadow_frame.GetVRegReference(inst->VRegB_12x());
-        if (UNLIKELY(array == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          shadow_frame.SetVReg(inst->VRegA_12x(), array->AsArray()->GetLength());
-          inst = inst->Next_1xx();
-        }
-        break;
-      }
-      case Instruction::NEW_INSTANCE: {
-        PREAMBLE();
-        Object* obj = AllocObjectFromCode(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                          self, do_access_check);
-        if (UNLIKELY(obj == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          shadow_frame.SetVRegReference(inst->VRegA_21c(), obj);
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::NEW_ARRAY: {
-        PREAMBLE();
-        int32_t length = shadow_frame.GetVReg(inst->VRegB_22c());
-        Object* obj = AllocArrayFromCode(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                         length, self, do_access_check);
-        if (UNLIKELY(obj == NULL)) {
-          HANDLE_PENDING_EXCEPTION();
-        } else {
-          shadow_frame.SetVRegReference(inst->VRegA_22c(), obj);
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::FILLED_NEW_ARRAY: {
-        PREAMBLE();
-        bool success = DoFilledNewArray<false, do_access_check>(inst, shadow_frame,
-                                                                self, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::FILLED_NEW_ARRAY_RANGE: {
-        PREAMBLE();
-        bool success = DoFilledNewArray<true, do_access_check>(inst, shadow_frame,
-                                                               self, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::FILL_ARRAY_DATA: {
-        PREAMBLE();
-        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_31t());
-        if (UNLIKELY(obj == NULL)) {
-          ThrowNullPointerException(NULL, "null array in FILL_ARRAY_DATA");
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        Array* array = obj->AsArray();
-        DCHECK(array->IsArrayInstance() && !array->IsObjectArray());
-        const uint16_t* payload_addr = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
-        const Instruction::ArrayDataPayload* payload =
-            reinterpret_cast<const Instruction::ArrayDataPayload*>(payload_addr);
-        if (UNLIKELY(static_cast<int32_t>(payload->element_count) > array->GetLength())) {
-          self->ThrowNewExceptionF(shadow_frame.GetCurrentLocationForThrow(),
-                                   "Ljava/lang/ArrayIndexOutOfBoundsException;",
-                                   "failed FILL_ARRAY_DATA; length=%d, index=%d",
-                                   array->GetLength(), payload->element_count);
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        uint32_t size_in_bytes = payload->element_count * payload->element_width;
-        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
-        inst = inst->Next_3xx();
-        break;
-      }
-      case Instruction::THROW: {
-        PREAMBLE();
-        Object* exception = shadow_frame.GetVRegReference(inst->VRegA_11x());
-        if (UNLIKELY(exception == NULL)) {
-          ThrowNullPointerException(NULL, "throw with null exception");
-        } else if (do_assignability_check && !exception->GetClass()->IsThrowableClass()) {
-          // This should never happen.
-          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
-                                   "Ljava/lang/VirtualMachineError;",
-                                   "Throwing '%s' that is not instance of Throwable",
-                                   ClassHelper(exception->GetClass()).GetDescriptor());
-        } else {
-          self->SetException(shadow_frame.GetCurrentLocationForThrow(), exception->AsThrowable());
-        }
-        HANDLE_PENDING_EXCEPTION();
-        break;
-      }
-      case Instruction::GOTO: {
-        PREAMBLE();
-        inst = inst->RelativeAt(inst->VRegA_10t());
-        break;
-      }
-      case Instruction::GOTO_16: {
-        PREAMBLE();
-        inst = inst->RelativeAt(inst->VRegA_20t());
-        break;
-      }
-      case Instruction::GOTO_32: {
-        PREAMBLE();
-        inst = inst->RelativeAt(inst->VRegA_30t());
-        break;
-      }
-      case Instruction::PACKED_SWITCH: {
-        PREAMBLE();
-        const uint16_t* switch_data = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
-        int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t());
-        DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
-        uint16_t size = switch_data[1];
-        DCHECK_GT(size, 0);
-        const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
-        DCHECK(IsAligned<4>(keys));
-        int32_t first_key = keys[0];
-        const int32_t* targets = reinterpret_cast<const int32_t*>(&switch_data[4]);
-        DCHECK(IsAligned<4>(targets));
-        int32_t index = test_val - first_key;
-        if (index >= 0 && index < size) {
-          inst = inst->RelativeAt(targets[index]);
-        } else {
-          inst = inst->Next_3xx();
-        }
-        break;
-      }
-      case Instruction::SPARSE_SWITCH: {
-        PREAMBLE();
-        inst = DoSparseSwitch(inst, shadow_frame);
-        break;
-      }
-      case Instruction::CMPL_FLOAT: {
-        PREAMBLE();
-        float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
-        float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
-        int32_t result;
-        if (val1 > val2) {
-          result = 1;
-        } else if (val1 == val2) {
-          result = 0;
-        } else {
-          result = -1;
-        }
-        shadow_frame.SetVReg(inst->VRegA_23x(), result);
-        inst = inst->Next_2xx();
-        break;
-      }
-      case Instruction::CMPG_FLOAT: {
-        PREAMBLE();
-        float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
-        float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
-        int32_t result;
-        if (val1 < val2) {
-          result = -1;
-        } else if (val1 == val2) {
-          result = 0;
-        } else {
-          result = 1;
-        }
-        shadow_frame.SetVReg(inst->VRegA_23x(), result);
-        inst = inst->Next_2xx();
-        break;
-      }
-      case Instruction::CMPL_DOUBLE: {
-        PREAMBLE();
-        double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
-        double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
-        int32_t result;
-        if (val1 > val2) {
-          result = 1;
-        } else if (val1 == val2) {
-          result = 0;
-        } else {
-          result = -1;
-        }
-        shadow_frame.SetVReg(inst->VRegA_23x(), result);
-        inst = inst->Next_2xx();
-        break;
-      }
-
-      case Instruction::CMPG_DOUBLE: {
-        PREAMBLE();
-        double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
-        double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
-        int32_t result;
-        if (val1 < val2) {
-          result = -1;
-        } else if (val1 == val2) {
-          result = 0;
-        } else {
-          result = 1;
-        }
-        shadow_frame.SetVReg(inst->VRegA_23x(), result);
-        inst = inst->Next_2xx();
-        break;
-      }
-      case Instruction::CMP_LONG: {
-        PREAMBLE();
-        int64_t val1 = shadow_frame.GetVRegLong(inst->VRegB_23x());
-        int64_t val2 = shadow_frame.GetVRegLong(inst->VRegC_23x());
-        int32_t result;
-        if (val1 > val2) {
-          result = 1;
-        } else if (val1 == val2) {
-          result = 0;
-        } else {
-          result = -1;
-        }
-        shadow_frame.SetVReg(inst->VRegA_23x(), result);
-        inst = inst->Next_2xx();
-        break;
-      }
-      case Instruction::IF_EQ: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_22t()) == shadow_frame.GetVReg(inst->VRegB_22t())) {
-          inst = inst->RelativeAt(inst->VRegC_22t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_NE: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_22t()) != shadow_frame.GetVReg(inst->VRegB_22t())) {
-          inst = inst->RelativeAt(inst->VRegC_22t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_LT: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_22t()) < shadow_frame.GetVReg(inst->VRegB_22t())) {
-          inst = inst->RelativeAt(inst->VRegC_22t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_GE: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_22t()) >= shadow_frame.GetVReg(inst->VRegB_22t())) {
-          inst = inst->RelativeAt(inst->VRegC_22t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_GT: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_22t()) > shadow_frame.GetVReg(inst->VRegB_22t())) {
-          inst = inst->RelativeAt(inst->VRegC_22t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_LE: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_22t()) <= shadow_frame.GetVReg(inst->VRegB_22t())) {
-          inst = inst->RelativeAt(inst->VRegC_22t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_EQZ: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_21t()) == 0) {
-          inst = inst->RelativeAt(inst->VRegB_21t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_NEZ: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_21t()) != 0) {
-          inst = inst->RelativeAt(inst->VRegB_21t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_LTZ: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_21t()) < 0) {
-          inst = inst->RelativeAt(inst->VRegB_21t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_GEZ: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_21t()) >= 0) {
-          inst = inst->RelativeAt(inst->VRegB_21t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_GTZ: {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_21t()) > 0) {
-          inst = inst->RelativeAt(inst->VRegB_21t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::IF_LEZ:  {
-        PREAMBLE();
-        if (shadow_frame.GetVReg(inst->VRegA_21t()) <= 0) {
-          inst = inst->RelativeAt(inst->VRegB_21t());
-        } else {
-          inst = inst->Next_2xx();
-        }
-        break;
-      }
-      case Instruction::AGET_BOOLEAN: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        BooleanArray* array = a->AsBooleanArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(), array->GetData()[index]);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::AGET_BYTE: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ByteArray* array = a->AsByteArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(), array->GetData()[index]);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::AGET_CHAR: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        CharArray* array = a->AsCharArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(), array->GetData()[index]);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::AGET_SHORT: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ShortArray* array = a->AsShortArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(), array->GetData()[index]);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::AGET: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        IntArray* array = a->AsIntArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(), array->GetData()[index]);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::AGET_WIDE:  {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        LongArray* array = a->AsLongArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVRegLong(inst->VRegA_23x(), array->GetData()[index]);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::AGET_OBJECT: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ObjectArray<Object>* array = a->AsObjectArray<Object>();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVRegReference(inst->VRegA_23x(), array->GetWithoutChecks(index));
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT_BOOLEAN: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x());
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        BooleanArray* array = a->AsBooleanArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT_BYTE: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int8_t val = shadow_frame.GetVReg(inst->VRegA_23x());
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ByteArray* array = a->AsByteArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT_CHAR: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x());
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        CharArray* array = a->AsCharArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT_SHORT: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int16_t val = shadow_frame.GetVReg(inst->VRegA_23x());
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ShortArray* array = a->AsShortArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t val = shadow_frame.GetVReg(inst->VRegA_23x());
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        IntArray* array = a->AsIntArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT_WIDE: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x());
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        LongArray* array = a->AsLongArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::APUT_OBJECT: {
-        PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
-        if (UNLIKELY(a == NULL)) {
-          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-          HANDLE_PENDING_EXCEPTION();
-          break;
-        }
-        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x());
-        ObjectArray<Object>* array = a->AsObjectArray<Object>();
-        if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
-          array->SetWithoutChecks(index, val);
-          inst = inst->Next_2xx();
-        } else {
-          HANDLE_PENDING_EXCEPTION();
-        }
-        break;
-      }
-      case Instruction::IGET_BOOLEAN: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_BYTE: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_CHAR: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_SHORT: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_WIDE: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_OBJECT: {
-        PREAMBLE();
-        bool success = DoFieldGet<InstanceObjectRead, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_QUICK: {
-        PREAMBLE();
-        bool success = DoIGetQuick<Primitive::kPrimInt>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_WIDE_QUICK: {
-        PREAMBLE();
-        bool success = DoIGetQuick<Primitive::kPrimLong>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IGET_OBJECT_QUICK: {
-        PREAMBLE();
-        bool success = DoIGetQuick<Primitive::kPrimNot>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET_BOOLEAN: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET_BYTE: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET_CHAR: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET_SHORT: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET_WIDE: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SGET_OBJECT: {
-        PREAMBLE();
-        bool success = DoFieldGet<StaticObjectRead, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_BOOLEAN: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_BYTE: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_CHAR: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_SHORT: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_WIDE: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_OBJECT: {
-        PREAMBLE();
-        bool success = DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_QUICK: {
-        PREAMBLE();
-        bool success = DoIPutQuick<Primitive::kPrimInt>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_WIDE_QUICK: {
-        PREAMBLE();
-        bool success = DoIPutQuick<Primitive::kPrimLong>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::IPUT_OBJECT_QUICK: {
-        PREAMBLE();
-        bool success = DoIPutQuick<Primitive::kPrimNot>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT_BOOLEAN: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT_BYTE: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT_CHAR: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT_SHORT: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT_WIDE: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SPUT_OBJECT: {
-        PREAMBLE();
-        bool success = DoFieldPut<StaticObjectWrite, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::INVOKE_VIRTUAL: {
-        PREAMBLE();
-        bool success = DoInvoke<kVirtual, false, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_VIRTUAL_RANGE: {
-        PREAMBLE();
-        bool success = DoInvoke<kVirtual, true, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_SUPER: {
-        PREAMBLE();
-        bool success = DoInvoke<kSuper, false, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_SUPER_RANGE: {
-        PREAMBLE();
-        bool success = DoInvoke<kSuper, true, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_DIRECT: {
-        PREAMBLE();
-        bool success = DoInvoke<kDirect, false, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_DIRECT_RANGE: {
-        PREAMBLE();
-        bool success = DoInvoke<kDirect, true, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_INTERFACE: {
-        PREAMBLE();
-        bool success = DoInvoke<kInterface, false, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_INTERFACE_RANGE: {
-        PREAMBLE();
-        bool success = DoInvoke<kInterface, true, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_STATIC: {
-        PREAMBLE();
-        bool success = DoInvoke<kStatic, false, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_STATIC_RANGE: {
-        PREAMBLE();
-        bool success = DoInvoke<kStatic, true, do_access_check>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_VIRTUAL_QUICK: {
-        PREAMBLE();
-        bool success = DoInvokeVirtualQuick<false>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-        PREAMBLE();
-        bool success = DoInvokeVirtualQuick<true>(self, shadow_frame, inst, &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
-        break;
-      }
-      case Instruction::NEG_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(), -shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::NOT_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(), ~shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::NEG_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_12x(), -shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::NOT_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_12x(), ~shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::NEG_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_12x(), -shadow_frame.GetVRegFloat(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::NEG_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_12x(), -shadow_frame.GetVRegDouble(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::INT_TO_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_12x(), shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::INT_TO_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_12x(), shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::INT_TO_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_12x(), shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::LONG_TO_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(), shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::LONG_TO_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_12x(), shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::LONG_TO_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_12x(), shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::FLOAT_TO_INT: {
-        PREAMBLE();
-        float val = shadow_frame.GetVRegFloat(inst->VRegB_12x());
-        int32_t result;
-        if (val != val) {
-          result = 0;
-        } else if (val > static_cast<float>(kMaxInt)) {
-          result = kMaxInt;
-        } else if (val < static_cast<float>(kMinInt)) {
-          result = kMinInt;
-        } else {
-          result = val;
-        }
-        shadow_frame.SetVReg(inst->VRegA_12x(), result);
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::FLOAT_TO_LONG: {
-        PREAMBLE();
-        float val = shadow_frame.GetVRegFloat(inst->VRegB_12x());
-        int64_t result;
-        if (val != val) {
-          result = 0;
-        } else if (val > static_cast<float>(kMaxLong)) {
-          result = kMaxLong;
-        } else if (val < static_cast<float>(kMinLong)) {
-          result = kMinLong;
-        } else {
-          result = val;
-        }
-        shadow_frame.SetVRegLong(inst->VRegA_12x(), result);
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::FLOAT_TO_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_12x(), shadow_frame.GetVRegFloat(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::DOUBLE_TO_INT: {
-        PREAMBLE();
-        double val = shadow_frame.GetVRegDouble(inst->VRegB_12x());
-        int32_t result;
-        if (val != val) {
-          result = 0;
-        } else if (val > static_cast<double>(kMaxInt)) {
-          result = kMaxInt;
-        } else if (val < static_cast<double>(kMinInt)) {
-          result = kMinInt;
-        } else {
-          result = val;
-        }
-        shadow_frame.SetVReg(inst->VRegA_12x(), result);
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::DOUBLE_TO_LONG: {
-        PREAMBLE();
-        double val = shadow_frame.GetVRegDouble(inst->VRegB_12x());
-        int64_t result;
-        if (val != val) {
-          result = 0;
-        } else if (val > static_cast<double>(kMaxLong)) {
-          result = kMaxLong;
-        } else if (val < static_cast<double>(kMinLong)) {
-          result = kMinLong;
-        } else {
-          result = val;
-        }
-        shadow_frame.SetVRegLong(inst->VRegA_12x(), result);
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::DOUBLE_TO_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_12x(), shadow_frame.GetVRegDouble(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::INT_TO_BYTE:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(),
-                             static_cast<int8_t>(shadow_frame.GetVReg(inst->VRegB_12x())));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::INT_TO_CHAR:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(),
-                             static_cast<uint16_t>(shadow_frame.GetVReg(inst->VRegB_12x())));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::INT_TO_SHORT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_12x(),
-                             static_cast<int16_t>(shadow_frame.GetVReg(inst->VRegB_12x())));
-        inst = inst->Next_1xx();
-        break;
-      case Instruction::ADD_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) +
-                             shadow_frame.GetVReg(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SUB_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) -
-                             shadow_frame.GetVReg(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MUL_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) *
-                             shadow_frame.GetVReg(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::DIV_INT: {
-        PREAMBLE();
-        bool success = DoIntDivide(shadow_frame, inst->VRegA_23x(),
-                                   shadow_frame.GetVReg(inst->VRegB_23x()),
-                                   shadow_frame.GetVReg(inst->VRegC_23x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::REM_INT: {
-        PREAMBLE();
-        bool success = DoIntRemainder(shadow_frame, inst->VRegA_23x(),
-                                      shadow_frame.GetVReg(inst->VRegB_23x()),
-                                      shadow_frame.GetVReg(inst->VRegC_23x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::SHL_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) <<
-                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SHR_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) >>
-                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::USHR_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             static_cast<uint32_t>(shadow_frame.GetVReg(inst->VRegB_23x())) >>
-                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::AND_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) &
-                             shadow_frame.GetVReg(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::OR_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) |
-                             shadow_frame.GetVReg(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::XOR_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_23x(),
-                             shadow_frame.GetVReg(inst->VRegB_23x()) ^
-                             shadow_frame.GetVReg(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::ADD_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) +
-                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SUB_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) -
-                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MUL_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) *
-                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::DIV_LONG:
-        PREAMBLE();
-        DoLongDivide(shadow_frame, inst->VRegA_23x(),
-                     shadow_frame.GetVRegLong(inst->VRegB_23x()),
-                    shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_2xx);
-        break;
-      case Instruction::REM_LONG:
-        PREAMBLE();
-        DoLongRemainder(shadow_frame, inst->VRegA_23x(),
-                        shadow_frame.GetVRegLong(inst->VRegB_23x()),
-                        shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_2xx);
-        break;
-      case Instruction::AND_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) &
-                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::OR_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) |
-                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::XOR_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) ^
-                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SHL_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) <<
-                                 (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SHR_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) >>
-                                 (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::USHR_LONG:
-        PREAMBLE();
-        shadow_frame.SetVRegLong(inst->VRegA_23x(),
-                                 static_cast<uint64_t>(shadow_frame.GetVRegLong(inst->VRegB_23x())) >>
-                                 (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::ADD_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_23x(),
-                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) +
-                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SUB_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_23x(),
-                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) -
-                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MUL_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_23x(),
-                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) *
-                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::DIV_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_23x(),
-                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) /
-                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::REM_FLOAT:
-        PREAMBLE();
-        shadow_frame.SetVRegFloat(inst->VRegA_23x(),
-                                  fmodf(shadow_frame.GetVRegFloat(inst->VRegB_23x()),
-                                        shadow_frame.GetVRegFloat(inst->VRegC_23x())));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::ADD_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_23x(),
-                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) +
-                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SUB_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_23x(),
-                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) -
-                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MUL_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_23x(),
-                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) *
-                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::DIV_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_23x(),
-                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) /
-                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::REM_DOUBLE:
-        PREAMBLE();
-        shadow_frame.SetVRegDouble(inst->VRegA_23x(),
-                                   fmod(shadow_frame.GetVRegDouble(inst->VRegB_23x()),
-                                        shadow_frame.GetVRegDouble(inst->VRegC_23x())));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::ADD_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) +
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SUB_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) -
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::MUL_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) *
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::DIV_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        bool success = DoIntDivide(shadow_frame, vregA, shadow_frame.GetVReg(vregA),
-                                   shadow_frame.GetVReg(inst->VRegB_12x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_1xx);
-        break;
-      }
-      case Instruction::REM_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        bool success = DoIntRemainder(shadow_frame, vregA, shadow_frame.GetVReg(vregA),
-                                      shadow_frame.GetVReg(inst->VRegB_12x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_1xx);
-        break;
-      }
-      case Instruction::SHL_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) <<
-                             (shadow_frame.GetVReg(inst->VRegB_12x()) & 0x1f));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SHR_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) >>
-                             (shadow_frame.GetVReg(inst->VRegB_12x()) & 0x1f));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::USHR_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             static_cast<uint32_t>(shadow_frame.GetVReg(vregA)) >>
-                             (shadow_frame.GetVReg(inst->VRegB_12x()) & 0x1f));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::AND_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) &
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::OR_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) |
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::XOR_INT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVReg(vregA,
-                             shadow_frame.GetVReg(vregA) ^
-                             shadow_frame.GetVReg(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::ADD_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) +
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SUB_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) -
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::MUL_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) *
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::DIV_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        DoLongDivide(shadow_frame, vregA, shadow_frame.GetVRegLong(vregA),
-                    shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
-        break;
-      }
-      case Instruction::REM_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        DoLongRemainder(shadow_frame, vregA, shadow_frame.GetVRegLong(vregA),
-                        shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
-        break;
-      }
-      case Instruction::AND_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) &
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::OR_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) |
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::XOR_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) ^
-                                 shadow_frame.GetVRegLong(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SHL_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) <<
-                                 (shadow_frame.GetVReg(inst->VRegB_12x()) & 0x3f));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SHR_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 shadow_frame.GetVRegLong(vregA) >>
-                                 (shadow_frame.GetVReg(inst->VRegB_12x()) & 0x3f));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::USHR_LONG_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegLong(vregA,
-                                 static_cast<uint64_t>(shadow_frame.GetVRegLong(vregA)) >>
-                                 (shadow_frame.GetVReg(inst->VRegB_12x()) & 0x3f));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::ADD_FLOAT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegFloat(vregA,
-                                  shadow_frame.GetVRegFloat(vregA) +
-                                  shadow_frame.GetVRegFloat(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SUB_FLOAT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegFloat(vregA,
-                                  shadow_frame.GetVRegFloat(vregA) -
-                                  shadow_frame.GetVRegFloat(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::MUL_FLOAT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegFloat(vregA,
-                                  shadow_frame.GetVRegFloat(vregA) *
-                                  shadow_frame.GetVRegFloat(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::DIV_FLOAT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegFloat(vregA,
-                                  shadow_frame.GetVRegFloat(vregA) /
-                                  shadow_frame.GetVRegFloat(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::REM_FLOAT_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegFloat(vregA,
-                                  fmodf(shadow_frame.GetVRegFloat(vregA),
-                                        shadow_frame.GetVRegFloat(inst->VRegB_12x())));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::ADD_DOUBLE_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegDouble(vregA,
-                                   shadow_frame.GetVRegDouble(vregA) +
-                                   shadow_frame.GetVRegDouble(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::SUB_DOUBLE_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegDouble(vregA,
-                                   shadow_frame.GetVRegDouble(vregA) -
-                                   shadow_frame.GetVRegDouble(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::MUL_DOUBLE_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegDouble(vregA,
-                                   shadow_frame.GetVRegDouble(vregA) *
-                                   shadow_frame.GetVRegDouble(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::DIV_DOUBLE_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegDouble(vregA,
-                                   shadow_frame.GetVRegDouble(vregA) /
-                                   shadow_frame.GetVRegDouble(inst->VRegB_12x()));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::REM_DOUBLE_2ADDR: {
-        PREAMBLE();
-        uint4_t vregA = inst->VRegA_12x();
-        shadow_frame.SetVRegDouble(vregA,
-                                   fmod(shadow_frame.GetVRegDouble(vregA),
-                                        shadow_frame.GetVRegDouble(inst->VRegB_12x())));
-        inst = inst->Next_1xx();
-        break;
-      }
-      case Instruction::ADD_INT_LIT16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22s(),
-                             shadow_frame.GetVReg(inst->VRegB_22s()) +
-                             inst->VRegC_22s());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::RSUB_INT:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22s(),
-                             inst->VRegC_22s() -
-                             shadow_frame.GetVReg(inst->VRegB_22s()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MUL_INT_LIT16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22s(),
-                             shadow_frame.GetVReg(inst->VRegB_22s()) *
-                             inst->VRegC_22s());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::DIV_INT_LIT16: {
-        PREAMBLE();
-        bool success = DoIntDivide(shadow_frame, inst->VRegA_22s(),
-                                   shadow_frame.GetVReg(inst->VRegB_22s()), inst->VRegC_22s());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::REM_INT_LIT16: {
-        PREAMBLE();
-        bool success = DoIntRemainder(shadow_frame, inst->VRegA_22s(),
-                                      shadow_frame.GetVReg(inst->VRegB_22s()), inst->VRegC_22s());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::AND_INT_LIT16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22s(),
-                             shadow_frame.GetVReg(inst->VRegB_22s()) &
-                             inst->VRegC_22s());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::OR_INT_LIT16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22s(),
-                             shadow_frame.GetVReg(inst->VRegB_22s()) |
-                             inst->VRegC_22s());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::XOR_INT_LIT16:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22s(),
-                             shadow_frame.GetVReg(inst->VRegB_22s()) ^
-                             inst->VRegC_22s());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::ADD_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) +
-                             inst->VRegC_22b());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::RSUB_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             inst->VRegC_22b() -
-                             shadow_frame.GetVReg(inst->VRegB_22b()));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::MUL_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) *
-                             inst->VRegC_22b());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::DIV_INT_LIT8: {
-        PREAMBLE();
-        bool success = DoIntDivide(shadow_frame, inst->VRegA_22b(),
-                                   shadow_frame.GetVReg(inst->VRegB_22b()), inst->VRegC_22b());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::REM_INT_LIT8: {
-        PREAMBLE();
-        bool success = DoIntRemainder(shadow_frame, inst->VRegA_22b(),
-                                      shadow_frame.GetVReg(inst->VRegB_22b()), inst->VRegC_22b());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::AND_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) &
-                             inst->VRegC_22b());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::OR_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) |
-                             inst->VRegC_22b());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::XOR_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) ^
-                             inst->VRegC_22b());
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SHL_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) <<
-                             (inst->VRegC_22b() & 0x1f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::SHR_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             shadow_frame.GetVReg(inst->VRegB_22b()) >>
-                             (inst->VRegC_22b() & 0x1f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::USHR_INT_LIT8:
-        PREAMBLE();
-        shadow_frame.SetVReg(inst->VRegA_22b(),
-                             static_cast<uint32_t>(shadow_frame.GetVReg(inst->VRegB_22b())) >>
-                             (inst->VRegC_22b() & 0x1f));
-        inst = inst->Next_2xx();
-        break;
-      case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_EB ... Instruction::UNUSED_FF:
-      case Instruction::UNUSED_79:
-      case Instruction::UNUSED_7A:
-        UnexpectedOpcode(inst, mh);
-    }
-  }
-}  // NOLINT(readability/fn_size)
+static const InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
 
 static JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
                       ShadowFrame& shadow_frame, JValue result_register)
@@ -3114,12 +281,23 @@
          shadow_frame.GetMethod()->GetDeclaringClass()->IsProxyClass());
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
-  if (shadow_frame.GetMethod()->IsPreverified()) {
+
+  if (LIKELY(shadow_frame.GetMethod()->IsPreverified())) {
     // Enter the "without access check" interpreter.
-    return ExecuteImpl<false>(self, mh, code_item, shadow_frame, result_register);
+    if (kInterpreterImplKind == kSwitchImpl) {
+      return ExecuteSwitchImpl<false>(self, mh, code_item, shadow_frame, result_register);
+    } else {
+      DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
+      return ExecuteGotoImpl<false>(self, mh, code_item, shadow_frame, result_register);
+    }
   } else {
     // Enter the "with access check" interpreter.
-    return ExecuteImpl<true>(self, mh, code_item, shadow_frame, result_register);
+    if (kInterpreterImplKind == kSwitchImpl) {
+      return ExecuteSwitchImpl<true>(self, mh, code_item, shadow_frame, result_register);
+    } else {
+      DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
+      return ExecuteGotoImpl<true>(self, mh, code_item, shadow_frame, result_register);
+    }
   }
 }
 
@@ -3131,6 +309,7 @@
     return;
   }
 
+  const char* old_cause = self->StartAssertNoThreadSuspension("EnterInterpreterFromInvoke");
   MethodHelper mh(method);
   const DexFile::CodeItem* code_item = mh.GetCodeItem();
   uint16_t num_regs;
@@ -3139,6 +318,7 @@
     num_regs =  code_item->registers_size_;
     num_ins = code_item->ins_size_;
   } else if (method->IsAbstract()) {
+    self->EndAssertNoThreadSuspension(old_cause);
     ThrowAbstractMethodError(method);
     return;
   } else {
@@ -3154,6 +334,8 @@
   void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
   ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory));
   self->PushShadowFrame(shadow_frame);
+  self->EndAssertNoThreadSuspension(old_cause);
+
   size_t cur_reg = num_regs - num_ins;
   if (!method->IsStatic()) {
     CHECK(receiver != NULL);
@@ -3161,8 +343,7 @@
     ++cur_reg;
   } else if (UNLIKELY(!method->GetDeclaringClass()->IsInitializing())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    if (UNLIKELY(!class_linker->EnsureInitialized(method->GetDeclaringClass(),
-                                                  true, true))) {
+    if (UNLIKELY(!class_linker->EnsureInitialized(method->GetDeclaringClass(), true, true))) {
       CHECK(self->IsExceptionPending());
       self->PopShadowFrame();
       return;
@@ -3243,17 +424,21 @@
     return;
   }
 
-  ArtMethod* method = shadow_frame->GetMethod();
-  if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
-    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(method->GetDeclaringClass(),
-                                                                 true, true)) {
-      DCHECK(Thread::Current()->IsExceptionPending());
-      return;
-    }
-    CHECK(method->GetDeclaringClass()->IsInitializing());
-  }
-
   self->PushShadowFrame(shadow_frame);
+  ArtMethod* method = shadow_frame->GetMethod();
+  // Ensure static methods are initialized.
+  if (method->IsStatic()) {
+    Class* declaringClass = method->GetDeclaringClass();
+    if (UNLIKELY(!declaringClass->IsInitializing())) {
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass,
+                                                                            true, true))) {
+        DCHECK(Thread::Current()->IsExceptionPending());
+        self->PopShadowFrame();
+        return;
+      }
+      CHECK(declaringClass->IsInitializing());
+    }
+  }
 
   if (LIKELY(!method->IsNative())) {
     result->SetJ(Execute(self, mh, code_item, *shadow_frame, JValue()).GetJ());
@@ -3261,13 +446,12 @@
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
     CHECK(!Runtime::Current()->IsStarted());
-    Object* receiver = method->IsStatic() ? NULL : shadow_frame->GetVRegReference(0);
+    Object* receiver = method->IsStatic() ? nullptr : shadow_frame->GetVRegReference(0);
     uint32_t* args = shadow_frame->GetVRegArgs(method->IsStatic() ? 0 : 1);
     UnstartedRuntimeJni(self, method, receiver, args, result);
   }
 
   self->PopShadowFrame();
-  return;
 }
 
 }  // namespace interpreter
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
new file mode 100644
index 0000000..19f55d2
--- /dev/null
+++ b/runtime/interpreter/interpreter_common.cc
@@ -0,0 +1,348 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter_common.h"
+
+namespace art {
+namespace interpreter {
+
+static void UnstartedRuntimeInvoke(Thread* self, MethodHelper& mh,
+                                   const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
+                                   JValue* result, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+// Assign register 'src_reg' from shadow_frame to register 'dest_reg' into new_shadow_frame.
+static inline void AssignRegister(ShadowFrame& new_shadow_frame, const ShadowFrame& shadow_frame,
+                                  size_t dest_reg, size_t src_reg) {
+  // If both register locations contains the same value, the register probably holds a reference.
+  int32_t src_value = shadow_frame.GetVReg(src_reg);
+  mirror::Object* o = shadow_frame.GetVRegReference(src_reg);
+  if (src_value == reinterpret_cast<int32_t>(o)) {
+    new_shadow_frame.SetVRegReference(dest_reg, o);
+  } else {
+    new_shadow_frame.SetVReg(dest_reg, src_value);
+  }
+}
+
+template<bool is_range, bool do_assignability_check>
+bool DoCall(ArtMethod* method, Object* receiver, Thread* self, ShadowFrame& shadow_frame,
+            const Instruction* inst, uint16_t inst_data, JValue* result) {
+  // Compute method information.
+  MethodHelper mh(method);
+  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+  uint16_t num_regs;
+  if (LIKELY(code_item != NULL)) {
+    num_regs = code_item->registers_size_;
+    DCHECK_EQ(num_ins, code_item->ins_size_);
+  } else {
+    DCHECK(method->IsNative() || method->IsProxyMethod());
+    num_regs = num_ins;
+  }
+
+  // Allocate shadow frame on the stack.
+  const char* old_cause = self->StartAssertNoThreadSuspension("DoCall");
+  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
+  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, method, 0, memory));
+
+  // Initialize new shadow frame.
+  const size_t first_dest_reg = num_regs - num_ins;
+  if (do_assignability_check) {
+    // Slow path: we need to do runtime check on reference assignment. We need to load the shorty
+    // to get the exact type of each reference argument.
+    const DexFile::TypeList* params = mh.GetParameterTypeList();
+    const char* shorty = mh.GetShorty();
+
+    // Handle receiver apart since it's not part of the shorty.
+    size_t dest_reg = first_dest_reg;
+    size_t arg_offset = 0;
+    if (receiver != NULL) {
+      DCHECK(!method->IsStatic());
+      new_shadow_frame->SetVRegReference(dest_reg, receiver);
+      ++dest_reg;
+      ++arg_offset;
+    } else {
+      DCHECK(method->IsStatic());
+    }
+    // TODO: find a cleaner way to separate non-range and range information without duplicating code.
+    uint32_t arg[5];  // only used in invoke-XXX.
+    uint32_t vregC;   // only used in invoke-XXX-range.
+    if (is_range) {
+      vregC = inst->VRegC_3rc();
+    } else {
+      inst->GetArgs(arg, inst_data);
+    }
+    for (size_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
+      DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
+      const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
+      switch (shorty[shorty_pos + 1]) {
+        case 'L': {
+          Object* o = shadow_frame.GetVRegReference(src_reg);
+          if (do_assignability_check && o != NULL) {
+            Class* arg_type = mh.GetClassFromTypeIdx(params->GetTypeItem(shorty_pos).type_idx_);
+            if (arg_type == NULL) {
+              CHECK(self->IsExceptionPending());
+              self->EndAssertNoThreadSuspension(old_cause);
+              return false;
+            }
+            if (!o->VerifierInstanceOf(arg_type)) {
+              self->EndAssertNoThreadSuspension(old_cause);
+              // This should never happen.
+              self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                       "Ljava/lang/VirtualMachineError;",
+                                       "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
+                                       mh.GetName(), shorty_pos,
+                                       ClassHelper(o->GetClass()).GetDescriptor(),
+                                       ClassHelper(arg_type).GetDescriptor());
+              return false;
+            }
+          }
+          new_shadow_frame->SetVRegReference(dest_reg, o);
+          break;
+        }
+        case 'J': case 'D': {
+          uint64_t wide_value = (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << 32) |
+                                static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
+          new_shadow_frame->SetVRegLong(dest_reg, wide_value);
+          ++dest_reg;
+          ++arg_offset;
+          break;
+        }
+        default:
+          new_shadow_frame->SetVReg(dest_reg, shadow_frame.GetVReg(src_reg));
+          break;
+      }
+    }
+  } else {
+    // Fast path: no extra checks.
+    if (is_range) {
+      const uint16_t first_src_reg = inst->VRegC_3rc();
+      for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
+          ++dest_reg, ++src_reg) {
+        AssignRegister(*new_shadow_frame, shadow_frame, dest_reg, src_reg);
+      }
+    } else {
+      DCHECK_LE(num_ins, 5U);
+      uint16_t regList = inst->Fetch16(2);
+      uint16_t count = num_ins;
+      if (count == 5) {
+        AssignRegister(*new_shadow_frame, shadow_frame, first_dest_reg + 4U, (inst_data >> 8) & 0x0f);
+        --count;
+       }
+      for (size_t arg_index = 0; arg_index < count; ++arg_index, regList >>= 4) {
+        AssignRegister(*new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
+      }
+    }
+  }
+  self->EndAssertNoThreadSuspension(old_cause);
+
+  // Do the call now.
+  if (LIKELY(Runtime::Current()->IsStarted())) {
+    (method->GetEntryPointFromInterpreter())(self, mh, code_item, new_shadow_frame, result);
+  } else {
+    UnstartedRuntimeInvoke(self, mh, code_item, new_shadow_frame, result, first_dest_reg);
+  }
+  return !self->IsExceptionPending();
+}
+
+template <bool is_range, bool do_access_check>
+bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
+                      Thread* self, JValue* result) {
+  DCHECK(inst->Opcode() == Instruction::FILLED_NEW_ARRAY ||
+         inst->Opcode() == Instruction::FILLED_NEW_ARRAY_RANGE);
+  const int32_t length = is_range ? inst->VRegA_3rc() : inst->VRegA_35c();
+  if (!is_range) {
+    // Checks FILLED_NEW_ARRAY's length does not exceed 5 arguments.
+    CHECK_LE(length, 5);
+  }
+  if (UNLIKELY(length < 0)) {
+    ThrowNegativeArraySizeException(length);
+    return false;
+  }
+  uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
+  Class* arrayClass = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
+                                             self, false, do_access_check);
+  if (UNLIKELY(arrayClass == NULL)) {
+    DCHECK(self->IsExceptionPending());
+    return false;
+  }
+  CHECK(arrayClass->IsArrayClass());
+  Class* componentClass = arrayClass->GetComponentType();
+  if (UNLIKELY(componentClass->IsPrimitive() && !componentClass->IsPrimitiveInt())) {
+    if (componentClass->IsPrimitiveLong() || componentClass->IsPrimitiveDouble()) {
+      ThrowRuntimeException("Bad filled array request for type %s",
+                            PrettyDescriptor(componentClass).c_str());
+    } else {
+      self->ThrowNewExceptionF(shadow_frame.GetCurrentLocationForThrow(),
+                               "Ljava/lang/InternalError;",
+                               "Found type %s; filled-new-array not implemented for anything but \'int\'",
+                               PrettyDescriptor(componentClass).c_str());
+    }
+    return false;
+  }
+  Object* newArray = Array::Alloc(self, arrayClass, length);
+  if (UNLIKELY(newArray == NULL)) {
+    DCHECK(self->IsExceptionPending());
+    return false;
+  }
+  if (is_range) {
+    uint32_t vregC = inst->VRegC_3rc();
+    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
+    for (int32_t i = 0; i < length; ++i) {
+      if (is_primitive_int_component) {
+        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(vregC + i));
+      } else {
+        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(vregC + i));
+      }
+    }
+  } else {
+    uint32_t arg[5];
+    inst->GetArgs(arg);
+    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
+    for (int32_t i = 0; i < length; ++i) {
+      if (is_primitive_int_component) {
+        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(arg[i]));
+      } else {
+        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(arg[i]));
+      }
+    }
+  }
+
+  result->SetL(newArray);
+  return true;
+}
+
+static void UnstartedRuntimeInvoke(Thread* self, MethodHelper& mh,
+                                   const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
+                                   JValue* result, size_t arg_offset) {
+  // In a runtime that's not started we intercept certain methods to avoid complicated dependency
+  // problems in core libraries.
+  std::string name(PrettyMethod(shadow_frame->GetMethod()));
+  if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)") {
+    std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
+    ClassLoader* class_loader = NULL;  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
+    Class* found = Runtime::Current()->GetClassLinker()->FindClass(descriptor.c_str(),
+                                                                   class_loader);
+    CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
+        << PrettyDescriptor(descriptor);
+    result->SetL(found);
+  } else if (name == "java.lang.Object java.lang.Class.newInstance()") {
+    Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
+    ArtMethod* c = klass->FindDeclaredDirectMethod("<init>", "()V");
+    CHECK(c != NULL);
+    SirtRef<Object> obj(self, klass->AllocObject(self));
+    CHECK(obj.get() != NULL);
+    EnterInterpreterFromInvoke(self, c, obj.get(), NULL, NULL);
+    result->SetL(obj.get());
+  } else if (name == "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") {
+    // Special managed code cut-out to allow field lookup in a un-started runtime that'd fail
+    // going the reflective Dex way.
+    Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
+    String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
+    ArtField* found = NULL;
+    FieldHelper fh;
+    ObjectArray<ArtField>* fields = klass->GetIFields();
+    for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
+      ArtField* f = fields->Get(i);
+      fh.ChangeField(f);
+      if (name->Equals(fh.GetName())) {
+        found = f;
+      }
+    }
+    if (found == NULL) {
+      fields = klass->GetSFields();
+      for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
+        ArtField* f = fields->Get(i);
+        fh.ChangeField(f);
+        if (name->Equals(fh.GetName())) {
+          found = f;
+        }
+      }
+    }
+    CHECK(found != NULL)
+      << "Failed to find field in Class.getDeclaredField in un-started runtime. name="
+      << name->ToModifiedUtf8() << " class=" << PrettyDescriptor(klass);
+    // TODO: getDeclaredField calls GetType once the field is found to ensure a
+    //       NoClassDefFoundError is thrown if the field's type cannot be resolved.
+    Class* jlr_Field = self->DecodeJObject(WellKnownClasses::java_lang_reflect_Field)->AsClass();
+    SirtRef<Object> field(self, jlr_Field->AllocObject(self));
+    CHECK(field.get() != NULL);
+    ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
+    uint32_t args[1];
+    args[0] = reinterpret_cast<uint32_t>(found);
+    EnterInterpreterFromInvoke(self, c, field.get(), args, NULL);
+    result->SetL(field.get());
+  } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)" ||
+             name == "void java.lang.System.arraycopy(char[], int, char[], int, int)") {
+    // Special case array copying without initializing System.
+    Class* ctype = shadow_frame->GetVRegReference(arg_offset)->GetClass()->GetComponentType();
+    jint srcPos = shadow_frame->GetVReg(arg_offset + 1);
+    jint dstPos = shadow_frame->GetVReg(arg_offset + 3);
+    jint length = shadow_frame->GetVReg(arg_offset + 4);
+    if (!ctype->IsPrimitive()) {
+      ObjectArray<Object>* src = shadow_frame->GetVRegReference(arg_offset)->AsObjectArray<Object>();
+      ObjectArray<Object>* dst = shadow_frame->GetVRegReference(arg_offset + 2)->AsObjectArray<Object>();
+      for (jint i = 0; i < length; ++i) {
+        dst->Set(dstPos + i, src->Get(srcPos + i));
+      }
+    } else if (ctype->IsPrimitiveChar()) {
+      CharArray* src = shadow_frame->GetVRegReference(arg_offset)->AsCharArray();
+      CharArray* dst = shadow_frame->GetVRegReference(arg_offset + 2)->AsCharArray();
+      for (jint i = 0; i < length; ++i) {
+        dst->Set(dstPos + i, src->Get(srcPos + i));
+      }
+    } else if (ctype->IsPrimitiveInt()) {
+      IntArray* src = shadow_frame->GetVRegReference(arg_offset)->AsIntArray();
+      IntArray* dst = shadow_frame->GetVRegReference(arg_offset + 2)->AsIntArray();
+      for (jint i = 0; i < length; ++i) {
+        dst->Set(dstPos + i, src->Get(srcPos + i));
+      }
+    } else {
+      UNIMPLEMENTED(FATAL) << "System.arraycopy of unexpected type: " << PrettyDescriptor(ctype);
+    }
+  } else {
+    // Not special, continue with regular interpreter execution.
+    artInterpreterToInterpreterBridge(self, mh, code_item, shadow_frame, result);
+  }
+}
+
+// Explicit DoCall template function declarations.
+#define EXPLICIT_DO_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)                      \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
+  bool DoCall<_is_range, _do_assignability_check>(ArtMethod* method, Object* receiver,          \
+                                                  Thread* self, ShadowFrame& shadow_frame,      \
+                                                  const Instruction* inst, uint16_t inst_data,  \
+                                                  JValue* result)
+EXPLICIT_DO_CALL_TEMPLATE_DECL(false, false);
+EXPLICIT_DO_CALL_TEMPLATE_DECL(false, true);
+EXPLICIT_DO_CALL_TEMPLATE_DECL(true, false);
+EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
+#undef EXPLICIT_DO_CALL_TEMPLATE_DECL
+
+// Explicit DoFilledNewArray template function declarations.
+#define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check)                \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                \
+  bool DoFilledNewArray<_is_range_, _check>(const Instruction* inst,                  \
+                                                     const ShadowFrame& shadow_frame, \
+                                                     Thread* self, JValue* result)
+EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(false, false);
+EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(false, true);
+EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(true, false);
+EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(true, true);
+#undef EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
new file mode 100644
index 0000000..29b00d2
--- /dev/null
+++ b/runtime/interpreter/interpreter_common.h
@@ -0,0 +1,680 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_COMMON_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_COMMON_H_
+
+#include "interpreter.h"
+
+#include <math.h>
+
+#include "base/logging.h"
+#include "class_linker-inl.h"
+#include "common_throws.h"
+#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
+#include "dex_instruction.h"
+#include "entrypoints/entrypoint_utils.h"
+#include "gc/accounting/card_table-inl.h"
+#include "invoke_arg_array_builder.h"
+#include "nth_caller_visitor.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "object_utils.h"
+#include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+#include "well_known_classes.h"
+
+using ::art::mirror::ArtField;
+using ::art::mirror::ArtMethod;
+using ::art::mirror::Array;
+using ::art::mirror::BooleanArray;
+using ::art::mirror::ByteArray;
+using ::art::mirror::CharArray;
+using ::art::mirror::Class;
+using ::art::mirror::ClassLoader;
+using ::art::mirror::IntArray;
+using ::art::mirror::LongArray;
+using ::art::mirror::Object;
+using ::art::mirror::ObjectArray;
+using ::art::mirror::ShortArray;
+using ::art::mirror::String;
+using ::art::mirror::Throwable;
+
+namespace art {
+namespace interpreter {
+
+// External references to both interpreter implementations.
+
+template<bool do_access_check>
+extern JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh,
+                                const DexFile::CodeItem* code_item,
+                                ShadowFrame& shadow_frame, JValue result_register);
+
+template<bool do_access_check>
+extern JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh,
+                              const DexFile::CodeItem* code_item,
+                              ShadowFrame& shadow_frame, JValue result_register);
+
+static inline void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
+  ref->MonitorEnter(self);
+}
+
+static inline void DoMonitorExit(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
+  ref->MonitorExit(self);
+}
+
+// Invokes the given method. This is part of the invocation support and is used by DoInvoke and
+// DoInvokeVirtualQuick functions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<bool is_range, bool do_assignability_check>
+bool DoCall(ArtMethod* method, Object* receiver, Thread* self, ShadowFrame& shadow_frame,
+            const Instruction* inst, uint16_t inst_data, JValue* result);
+
+
+// Handles invoke-XXX/range instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<InvokeType type, bool is_range, bool do_access_check>
+static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                            uint16_t inst_data, JValue* result) {
+  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+  Object* receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
+  ArtMethod* const method = FindMethodFromCode(method_idx, receiver, shadow_frame.GetMethod(), self,
+                                               do_access_check, type);
+  if (type != kStatic) {
+    // Reload the vreg since the GC may have moved the object.
+    receiver = shadow_frame.GetVRegReference(vregC);
+  }
+
+  if (UNLIKELY(method == NULL)) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  } else if (UNLIKELY(method->IsAbstract())) {
+    ThrowAbstractMethodError(method);
+    result->SetJ(0);
+    return false;
+  } else {
+    return DoCall<is_range, do_access_check>(method, receiver, self, shadow_frame, inst,
+                                             inst_data, result);
+  }
+}
+
+// Handles invoke-virtual-quick and invoke-virtual-quick-range instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<bool is_range>
+static inline bool DoInvokeVirtualQuick(Thread* self, ShadowFrame& shadow_frame,
+                                        const Instruction* inst, uint16_t inst_data,
+                                        JValue* result) {
+  const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+  Object* const receiver = shadow_frame.GetVRegReference(vregC);
+  if (UNLIKELY(receiver == NULL)) {
+    // We lost the reference to the method index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  ArtMethod* const method = receiver->GetClass()->GetVTable()->GetWithoutChecks(vtable_idx);
+  if (UNLIKELY(method == NULL)) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  } else if (UNLIKELY(method->IsAbstract())) {
+    ThrowAbstractMethodError(method);
+    result->SetJ(0);
+    return false;
+  } else {
+    // No need to check since we've been quickened.
+    return DoCall<is_range, false>(method, receiver, self, shadow_frame, inst, inst_data, result);
+  }
+}
+
+// Handles iget-XXX and sget-XXX instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
+static inline bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
+                              const Instruction* inst, uint16_t inst_data) {
+  bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
+  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f = FindFieldFromCode(field_idx, shadow_frame.GetMethod(), self,
+                                  find_type, Primitive::FieldSize(field_type),
+                                  do_access_check);
+  if (UNLIKELY(f == NULL)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  Object* obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == NULL)) {
+      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(), f, true);
+      return false;
+    }
+  }
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
+      break;
+    case Primitive::kPrimByte:
+      shadow_frame.SetVReg(vregA, f->GetByte(obj));
+      break;
+    case Primitive::kPrimChar:
+      shadow_frame.SetVReg(vregA, f->GetChar(obj));
+      break;
+    case Primitive::kPrimShort:
+      shadow_frame.SetVReg(vregA, f->GetShort(obj));
+      break;
+    case Primitive::kPrimInt:
+      shadow_frame.SetVReg(vregA, f->GetInt(obj));
+      break;
+    case Primitive::kPrimLong:
+      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
+      break;
+    case Primitive::kPrimNot:
+      shadow_frame.SetVRegReference(vregA, f->GetObject(obj));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<Primitive::Type field_type>
+static inline bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  if (UNLIKELY(obj == NULL)) {
+    // We lost the reference to the field index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  MemberOffset field_offset(inst->VRegC_22c());
+  const bool is_volatile = false;  // iget-x-quick only on non volatile fields.
+  const uint32_t vregA = inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimInt:
+      shadow_frame.SetVReg(vregA, static_cast<int32_t>(obj->GetField32(field_offset, is_volatile)));
+      break;
+    case Primitive::kPrimLong:
+      shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset, is_volatile)));
+      break;
+    case Primitive::kPrimNot:
+      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object*>(field_offset, is_volatile));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Handles iput-XXX and sput-XXX instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
+static inline bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
+                              const Instruction* inst, uint16_t inst_data) {
+  bool do_assignability_check = do_access_check;
+  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
+  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f = FindFieldFromCode(field_idx, shadow_frame.GetMethod(), self,
+                                  find_type, Primitive::FieldSize(field_type),
+                                  do_access_check);
+  if (UNLIKELY(f == NULL)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  Object* obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == NULL)) {
+      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(),
+                                              f, false);
+      return false;
+    }
+  }
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      f->SetBoolean(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimByte:
+      f->SetByte(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimChar:
+      f->SetChar(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimShort:
+      f->SetShort(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimInt:
+      f->SetInt(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimLong:
+      f->SetLong(obj, shadow_frame.GetVRegLong(vregA));
+      break;
+    case Primitive::kPrimNot: {
+      Object* reg = shadow_frame.GetVRegReference(vregA);
+      if (do_assignability_check && reg != NULL) {
+        Class* field_class = FieldHelper(f).GetType();
+        if (!reg->VerifierInstanceOf(field_class)) {
+          // This should never happen.
+          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                   "Ljava/lang/VirtualMachineError;",
+                                   "Put '%s' that is not instance of field '%s' in '%s'",
+                                   ClassHelper(reg->GetClass()).GetDescriptor(),
+                                   ClassHelper(field_class).GetDescriptor(),
+                                   ClassHelper(f->GetDeclaringClass()).GetDescriptor());
+          return false;
+        }
+      }
+      f->SetObj(obj, reg);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Handles iput-quick, iput-wide-quick and iput-object-quick instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<Primitive::Type field_type>
+static inline bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  if (UNLIKELY(obj == NULL)) {
+    // We lost the reference to the field index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  MemberOffset field_offset(inst->VRegC_22c());
+  const bool is_volatile = false;  // iput-x-quick only on non volatile fields.
+  const uint32_t vregA = inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimInt:
+      obj->SetField32(field_offset, shadow_frame.GetVReg(vregA), is_volatile);
+      break;
+    case Primitive::kPrimLong:
+      obj->SetField64(field_offset, shadow_frame.GetVRegLong(vregA), is_volatile);
+      break;
+    case Primitive::kPrimNot:
+      obj->SetFieldObject(field_offset, shadow_frame.GetVRegReference(vregA), is_volatile);
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Handles string resolution for const-string and const-string-jumbo instructions. Also ensures the
+// java.lang.String class is initialized.
+static inline String* ResolveString(Thread* self, MethodHelper& mh, uint32_t string_idx)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Class* java_lang_string_class = String::GetJavaLangString();
+  if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    if (UNLIKELY(!class_linker->EnsureInitialized(java_lang_string_class,
+                                                  true, true))) {
+      DCHECK(self->IsExceptionPending());
+      return NULL;
+    }
+  }
+  return mh.ResolveString(string_idx);
+}
+
+// Handles div-int, div-int/2addr, div-int/li16 and div-int/lit8 instructions.
+// Returns true on success, otherwise throws a java.lang.ArithmeticException and return false.
+static inline bool DoIntDivide(ShadowFrame& shadow_frame, size_t result_reg,
+                               int32_t dividend, int32_t divisor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const int32_t kMinInt = std::numeric_limits<int32_t>::min();
+  if (UNLIKELY(divisor == 0)) {
+    ThrowArithmeticExceptionDivideByZero();
+    return false;
+  }
+  if (UNLIKELY(dividend == kMinInt && divisor == -1)) {
+    shadow_frame.SetVReg(result_reg, kMinInt);
+  } else {
+    shadow_frame.SetVReg(result_reg, dividend / divisor);
+  }
+  return true;
+}
+
+// Handles rem-int, rem-int/2addr, rem-int/li16 and rem-int/lit8 instructions.
+// Returns true on success, otherwise throws a java.lang.ArithmeticException and return false.
+static inline bool DoIntRemainder(ShadowFrame& shadow_frame, size_t result_reg,
+                                  int32_t dividend, int32_t divisor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const int32_t kMinInt = std::numeric_limits<int32_t>::min();
+  if (UNLIKELY(divisor == 0)) {
+    ThrowArithmeticExceptionDivideByZero();
+    return false;
+  }
+  if (UNLIKELY(dividend == kMinInt && divisor == -1)) {
+    shadow_frame.SetVReg(result_reg, 0);
+  } else {
+    shadow_frame.SetVReg(result_reg, dividend % divisor);
+  }
+  return true;
+}
+
+// Handles div-long and div-long-2addr instructions.
+// Returns true on success, otherwise throws a java.lang.ArithmeticException and return false.
+static inline bool DoLongDivide(ShadowFrame& shadow_frame, size_t result_reg,
+                                int64_t dividend, int64_t divisor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const int64_t kMinLong = std::numeric_limits<int64_t>::min();
+  if (UNLIKELY(divisor == 0)) {
+    ThrowArithmeticExceptionDivideByZero();
+    return false;
+  }
+  if (UNLIKELY(dividend == kMinLong && divisor == -1)) {
+    shadow_frame.SetVRegLong(result_reg, kMinLong);
+  } else {
+    shadow_frame.SetVRegLong(result_reg, dividend / divisor);
+  }
+  return true;
+}
+
+// Handles rem-long and rem-long-2addr instructions.
+// Returns true on success, otherwise throws a java.lang.ArithmeticException and return false.
+static inline bool DoLongRemainder(ShadowFrame& shadow_frame, size_t result_reg,
+                                   int64_t dividend, int64_t divisor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const int64_t kMinLong = std::numeric_limits<int64_t>::min();
+  if (UNLIKELY(divisor == 0)) {
+    ThrowArithmeticExceptionDivideByZero();
+    return false;
+  }
+  if (UNLIKELY(dividend == kMinLong && divisor == -1)) {
+    shadow_frame.SetVRegLong(result_reg, 0);
+  } else {
+    shadow_frame.SetVRegLong(result_reg, dividend % divisor);
+  }
+  return true;
+}
+
+// Handles filled-new-array and filled-new-array-range instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template <bool is_range, bool do_access_check>
+bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
+                      Thread* self, JValue* result);
+
+// Handles packed-switch instruction.
+// Returns the branch offset to the next instruction to execute.
+static inline int32_t DoPackedSwitch(const Instruction* inst, const ShadowFrame& shadow_frame,
+                                     uint16_t inst_data)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(inst->Opcode() == Instruction::PACKED_SWITCH);
+  const uint16_t* switch_data = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
+  int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t(inst_data));
+  DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+  uint16_t size = switch_data[1];
+  DCHECK_GT(size, 0);
+  const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
+  DCHECK(IsAligned<4>(keys));
+  int32_t first_key = keys[0];
+  const int32_t* targets = reinterpret_cast<const int32_t*>(&switch_data[4]);
+  DCHECK(IsAligned<4>(targets));
+  int32_t index = test_val - first_key;
+  if (index >= 0 && index < size) {
+    return targets[index];
+  } else {
+    // No corresponding value: move forward by 3 (size of PACKED_SWITCH).
+    return 3;
+  }
+}
+
+// Handles sparse-switch instruction.
+// Returns the branch offset to the next instruction to execute.
+static inline int32_t DoSparseSwitch(const Instruction* inst, const ShadowFrame& shadow_frame,
+                                     uint16_t inst_data)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(inst->Opcode() == Instruction::SPARSE_SWITCH);
+  const uint16_t* switch_data = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
+  int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t(inst_data));
+  DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+  uint16_t size = switch_data[1];
+  DCHECK_GT(size, 0);
+  const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
+  DCHECK(IsAligned<4>(keys));
+  const int32_t* entries = keys + size;
+  DCHECK(IsAligned<4>(entries));
+  int lo = 0;
+  int hi = size - 1;
+  while (lo <= hi) {
+    int mid = (lo + hi) / 2;
+    int32_t foundVal = keys[mid];
+    if (test_val < foundVal) {
+      hi = mid - 1;
+    } else if (test_val > foundVal) {
+      lo = mid + 1;
+    } else {
+      return entries[mid];
+    }
+  }
+  // No corresponding value: move forward by 3 (size of SPARSE_SWITCH).
+  return 3;
+}
+
+static inline uint32_t FindNextInstructionFollowingException(Thread* self,
+                                                             ShadowFrame& shadow_frame,
+                                                             uint32_t dex_pc,
+                                                             mirror::Object* this_object,
+                                                             const instrumentation::Instrumentation* instrumentation)
+    ALWAYS_INLINE;
+
+static inline uint32_t FindNextInstructionFollowingException(Thread* self,
+                                                             ShadowFrame& shadow_frame,
+                                                             uint32_t dex_pc,
+                                                             mirror::Object* this_object,
+                                                             const instrumentation::Instrumentation* instrumentation)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  self->VerifyStack();
+  ThrowLocation throw_location;
+  mirror::Throwable* exception = self->GetException(&throw_location);
+  bool clear_exception = false;
+  uint32_t found_dex_pc = shadow_frame.GetMethod()->FindCatchBlock(exception->GetClass(), dex_pc,
+                                                                   &clear_exception);
+  if (found_dex_pc == DexFile::kDexNoIndex) {
+    instrumentation->MethodUnwindEvent(self, this_object,
+                                       shadow_frame.GetMethod(), dex_pc);
+  } else {
+    instrumentation->ExceptionCaughtEvent(self, throw_location,
+                                          shadow_frame.GetMethod(),
+                                          found_dex_pc, exception);
+    if (clear_exception) {
+      self->ClearException();
+    }
+  }
+  return found_dex_pc;
+}
+
+static void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
+  __attribute__((cold, noreturn, noinline));
+
+static void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(&mh.GetDexFile());
+  exit(0);  // Unreachable, keep GCC happy.
+}
+
+static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
+                                  const uint32_t dex_pc, MethodHelper& mh)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  constexpr bool kTracing = false;
+  if (kTracing) {
+#define TRACE_LOG std::cerr
+    std::ostringstream oss;
+    oss << PrettyMethod(shadow_frame.GetMethod())
+        << StringPrintf("\n0x%x: ", dex_pc)
+        << inst->DumpString(&mh.GetDexFile()) << "\n";
+    for (size_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
+      uint32_t raw_value = shadow_frame.GetVReg(i);
+      Object* ref_value = shadow_frame.GetVRegReference(i);
+      oss << StringPrintf(" vreg%d=0x%08X", i, raw_value);
+      if (ref_value != NULL) {
+        if (ref_value->GetClass()->IsStringClass() &&
+            ref_value->AsString()->GetCharArray() != NULL) {
+          oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\"";
+        } else {
+          oss << "/" << PrettyTypeOf(ref_value);
+        }
+      }
+    }
+    TRACE_LOG << oss.str() << "\n";
+#undef TRACE_LOG
+  }
+}
+
+static inline bool IsBackwardBranch(int32_t branch_offset) {
+  return branch_offset <= 0;
+}
+
+// Explicitly instantiate all DoInvoke functions.
+#define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                             \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                              \
+  static bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
+                                                    const Instruction* inst, uint16_t inst_data,  \
+                                                    JValue* result)
+
+#define EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(_type)       \
+  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, false, false);  \
+  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, false, true);   \
+  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, false);   \
+  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, true);
+
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kStatic);      // invoke-static/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kDirect);      // invoke-direct/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kVirtual);     // invoke-virtual/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kSuper);       // invoke-super/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kInterface);   // invoke-interface/range.
+#undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL
+
+// Explicitly instantiate all DoFieldGet functions.
+#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check)                       \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                                  \
+  static bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, ShadowFrame& shadow_frame, \
+                                                             const Instruction* inst, uint16_t inst_data)
+
+#define EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false);  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
+
+// iget-XXX
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot);
+
+// sget-XXX
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot);
+
+#undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
+
+// Explicitly instantiate all DoFieldPut functions.
+#define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check)                             \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                                        \
+  static bool DoFieldPut<_find_type, _field_type, _do_check>(Thread* self, const ShadowFrame& shadow_frame, \
+                                                             const Instruction* inst, uint16_t inst_data)
+
+#define EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true);
+
+// iput-XXX
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot);
+
+// sput-XXX
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot);
+
+#undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
+
+// Explicitly instantiate all DoInvokeVirtualQuick functions.
+#define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                       \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                    \
+  static bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame,  \
+                                          const Instruction* inst, uint16_t inst_data,  \
+                                          JValue* result)
+
+EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(false);  // invoke-virtual-quick.
+EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
+#undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
+
+// Explicitly instantiate all DoIGetQuick functions.
+#define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type)                                   \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                        \
+  static bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst,  \
+                                       uint16_t inst_data)
+
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
+#undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
+
+// Explicitly instantiate all DoIPutQuick functions.
+#define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type)                                         \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                              \
+  static bool DoIPutQuick<_field_type>(const ShadowFrame& shadow_frame, const Instruction* inst,  \
+                                       uint16_t inst_data)
+
+EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
+EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
+EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
+#undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_COMMON_H_
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
new file mode 100644
index 0000000..3a91b8c
--- /dev/null
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -0,0 +1,2346 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter_common.h"
+
+namespace art {
+namespace interpreter {
+
+// In the following macros, we expect the following local variables exist:
+// - "self": the current Thread*.
+// - "inst" : the current Instruction*.
+// - "inst_data" : the current instruction's first 16 bits.
+// - "dex_pc": the current pc.
+// - "shadow_frame": the current shadow frame.
+// - "mh": the current MethodHelper.
+// - "currentHandlersTable": the current table of pointer to each instruction handler.
+
+// Advance to the next instruction and updates interpreter state.
+#define ADVANCE(_offset)                                                    \
+  do {                                                                      \
+    int32_t disp = static_cast<int32_t>(_offset);                           \
+    inst = inst->RelativeAt(disp);                                          \
+    dex_pc = static_cast<uint32_t>(static_cast<int32_t>(dex_pc) + disp);    \
+    shadow_frame.SetDexPC(dex_pc);                                          \
+    TraceExecution(shadow_frame, inst, dex_pc, mh);                         \
+    inst_data = inst->Fetch16(0);                                           \
+    goto *currentHandlersTable[inst->Opcode(inst_data)];                    \
+  } while (false)
+
+#define HANDLE_PENDING_EXCEPTION() goto exception_pending_label
+
+#define POSSIBLY_HANDLE_PENDING_EXCEPTION(_is_exception_pending, _offset)   \
+  do {                                                                      \
+    if (UNLIKELY(_is_exception_pending)) {                                  \
+      HANDLE_PENDING_EXCEPTION();                                           \
+    } else {                                                                \
+      ADVANCE(_offset);                                                     \
+    }                                                                       \
+  } while (false)
+
+#define UPDATE_HANDLER_TABLE()                              \
+  do {                                                      \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {   \
+      currentHandlersTable = instrumentationHandlersTable;  \
+    } else {                                                \
+      currentHandlersTable = handlersTable;                 \
+    }                                                       \
+  } while (false);
+
+#define UNREACHABLE_CODE_CHECK()                \
+  do {                                          \
+    if (kIsDebugBuild) {                        \
+      LOG(FATAL) << "We should not be here !";  \
+    }                                           \
+  } while (false)
+
+#define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
+#define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
+
+template<bool do_access_check>
+JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
+                       ShadowFrame& shadow_frame, JValue result_register) {
+  bool do_assignability_check = do_access_check;
+  if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
+    LOG(FATAL) << "Invalid shadow frame for interpreter use";
+    return JValue();
+  }
+  self->VerifyStack();
+
+  uint32_t dex_pc = shadow_frame.GetDexPC();
+  const instrumentation::Instrumentation* const instrumentation = Runtime::Current()->GetInstrumentation();
+  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
+    if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
+      instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                        shadow_frame.GetMethod(), 0);
+    }
+  }
+  const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
+  uint16_t inst_data;
+
+  // Define handlers table.
+  static const void* handlersTable[kNumPackedOpcodes] = {
+#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&op_##code,
+#include "dex_instruction_list.h"
+      DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
+#undef DEX_INSTRUCTION_LIST
+#undef INSTRUCTION_HANDLER
+  };
+
+  static const void* instrumentationHandlersTable[kNumPackedOpcodes] = {
+#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&instrumentation_op_##code,
+#include "dex_instruction_list.h"
+      DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
+#undef DEX_INSTRUCTION_LIST
+#undef INSTRUCTION_HANDLER
+  };
+
+  const void** currentHandlersTable;
+  UPDATE_HANDLER_TABLE();
+
+  // Jump to first instruction.
+  ADVANCE(0);
+  UNREACHABLE_CODE_CHECK();
+
+  HANDLE_INSTRUCTION_START(NOP)
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_FROM16)
+    shadow_frame.SetVReg(inst->VRegA_22x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_16)
+    shadow_frame.SetVReg(inst->VRegA_32x(),
+                         shadow_frame.GetVReg(inst->VRegB_32x()));
+    ADVANCE(3);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_WIDE)
+    shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_WIDE_FROM16)
+    shadow_frame.SetVRegLong(inst->VRegA_22x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_22x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_WIDE_16)
+    shadow_frame.SetVRegLong(inst->VRegA_32x(),
+                             shadow_frame.GetVRegLong(inst->VRegB_32x()));
+    ADVANCE(3);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_OBJECT)
+    shadow_frame.SetVRegReference(inst->VRegA_12x(inst_data),
+                                  shadow_frame.GetVRegReference(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_OBJECT_FROM16)
+    shadow_frame.SetVRegReference(inst->VRegA_22x(inst_data),
+                                  shadow_frame.GetVRegReference(inst->VRegB_22x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_OBJECT_16)
+    shadow_frame.SetVRegReference(inst->VRegA_32x(),
+                                  shadow_frame.GetVRegReference(inst->VRegB_32x()));
+    ADVANCE(3);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_RESULT)
+    shadow_frame.SetVReg(inst->VRegA_11x(inst_data), result_register.GetI());
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_RESULT_WIDE)
+    shadow_frame.SetVRegLong(inst->VRegA_11x(inst_data), result_register.GetJ());
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_RESULT_OBJECT)
+    shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), result_register.GetL());
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MOVE_EXCEPTION) {
+    Throwable* exception = self->GetException(NULL);
+    self->ClearException();
+    shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RETURN_VOID) {
+    JValue result;
+    if (do_access_check) {
+      // If access checks are required then the dex-to-dex compiler and analysis of
+      // whether the class has final fields hasn't been performed. Conservatively
+      // perform the memory barrier now.
+      ANDROID_MEMBAR_STORE();
+    }
+    if (UNLIKELY(self->TestAllFlags())) {
+      CheckSuspend(self);
+    }
+    if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+      instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc,
+                                       result);
+    }
+    return result;
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RETURN_VOID_BARRIER) {
+    ANDROID_MEMBAR_STORE();
+    JValue result;
+    if (UNLIKELY(self->TestAllFlags())) {
+      CheckSuspend(self);
+    }
+    if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+      instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc,
+                                       result);
+    }
+    return result;
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RETURN) {
+    JValue result;
+    result.SetJ(0);
+    result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
+    if (UNLIKELY(self->TestAllFlags())) {
+      CheckSuspend(self);
+    }
+    if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+      instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc,
+                                       result);
+    }
+    return result;
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RETURN_WIDE) {
+    JValue result;
+    result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
+    if (UNLIKELY(self->TestAllFlags())) {
+      CheckSuspend(self);
+    }
+    if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+      instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc,
+                                       result);
+    }
+    return result;
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RETURN_OBJECT) {
+    JValue result;
+    if (UNLIKELY(self->TestAllFlags())) {
+      CheckSuspend(self);
+    }
+    Object* obj_result = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+    result.SetJ(0);
+    result.SetL(obj_result);
+    if (do_assignability_check && obj_result != NULL) {
+      Class* return_type = MethodHelper(shadow_frame.GetMethod()).GetReturnType();
+      if (return_type == NULL) {
+        // Return the pending exception.
+        HANDLE_PENDING_EXCEPTION();
+      }
+      if (!obj_result->VerifierInstanceOf(return_type)) {
+        // This should never happen.
+        self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                 "Ljava/lang/VirtualMachineError;",
+                                 "Returning '%s' that is not instance of return type '%s'",
+                                 ClassHelper(obj_result->GetClass()).GetDescriptor(),
+                                 ClassHelper(return_type).GetDescriptor());
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+    if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+      instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc,
+                                       result);
+    }
+    return result;
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_4) {
+    uint32_t dst = inst->VRegA_11n(inst_data);
+    int32_t val = inst->VRegB_11n(inst_data);
+    shadow_frame.SetVReg(dst, val);
+    if (val == 0) {
+      shadow_frame.SetVRegReference(dst, NULL);
+    }
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_16) {
+    uint32_t dst = inst->VRegA_21s(inst_data);
+    int32_t val = inst->VRegB_21s();
+    shadow_frame.SetVReg(dst, val);
+    if (val == 0) {
+      shadow_frame.SetVRegReference(dst, NULL);
+    }
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST) {
+    uint32_t dst = inst->VRegA_31i(inst_data);
+    int32_t val = inst->VRegB_31i();
+    shadow_frame.SetVReg(dst, val);
+    if (val == 0) {
+      shadow_frame.SetVRegReference(dst, NULL);
+    }
+    ADVANCE(3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_HIGH16) {
+    uint32_t dst = inst->VRegA_21h(inst_data);
+    int32_t val = static_cast<int32_t>(inst->VRegB_21h() << 16);
+    shadow_frame.SetVReg(dst, val);
+    if (val == 0) {
+      shadow_frame.SetVRegReference(dst, NULL);
+    }
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_WIDE_16)
+    shadow_frame.SetVRegLong(inst->VRegA_21s(inst_data), inst->VRegB_21s());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_WIDE_32)
+    shadow_frame.SetVRegLong(inst->VRegA_31i(inst_data), inst->VRegB_31i());
+    ADVANCE(3);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_WIDE)
+    shadow_frame.SetVRegLong(inst->VRegA_51l(inst_data), inst->VRegB_51l());
+    ADVANCE(5);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_WIDE_HIGH16)
+    shadow_frame.SetVRegLong(inst->VRegA_21h(inst_data),
+                             static_cast<uint64_t>(inst->VRegB_21h()) << 48);
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_STRING) {
+    String* s = ResolveString(self, mh, inst->VRegB_21c());
+    if (UNLIKELY(s == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), s);
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_STRING_JUMBO) {
+    String* s = ResolveString(self, mh, inst->VRegB_31c());
+    if (UNLIKELY(s == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      shadow_frame.SetVRegReference(inst->VRegA_31c(inst_data), s);
+      ADVANCE(3);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CONST_CLASS) {
+    Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                      self, false, do_access_check);
+    if (UNLIKELY(c == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), c);
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MONITOR_ENTER) {
+    Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+    if (UNLIKELY(obj == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      DoMonitorEnter(self, obj);
+      POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MONITOR_EXIT) {
+    Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+    if (UNLIKELY(obj == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      DoMonitorExit(self, obj);
+      POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CHECK_CAST) {
+    Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                      self, false, do_access_check);
+    if (UNLIKELY(c == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      Object* obj = shadow_frame.GetVRegReference(inst->VRegA_21c(inst_data));
+      if (UNLIKELY(obj != NULL && !obj->InstanceOf(c))) {
+        ThrowClassCastException(c, obj->GetClass());
+        HANDLE_PENDING_EXCEPTION();
+      } else {
+        ADVANCE(2);
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INSTANCE_OF) {
+    Class* c = ResolveVerifyAndClinit(inst->VRegC_22c(), shadow_frame.GetMethod(),
+                                      self, false, do_access_check);
+    if (UNLIKELY(c == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+      shadow_frame.SetVReg(inst->VRegA_22c(inst_data), (obj != NULL && obj->InstanceOf(c)) ? 1 : 0);
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ARRAY_LENGTH)  {
+    Object* array = shadow_frame.GetVRegReference(inst->VRegB_12x(inst_data));
+    if (UNLIKELY(array == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      shadow_frame.SetVReg(inst->VRegA_12x(inst_data), array->AsArray()->GetLength());
+      ADVANCE(1);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NEW_INSTANCE) {
+    Object* obj = AllocObjectFromCodeInstrumented(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                                  self, do_access_check);
+    if (UNLIKELY(obj == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), obj);
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NEW_ARRAY) {
+    int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
+    Object* obj = AllocArrayFromCodeInstrumented(inst->VRegC_22c(), shadow_frame.GetMethod(),
+                                                 length, self, do_access_check);
+    if (UNLIKELY(obj == NULL)) {
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      shadow_frame.SetVRegReference(inst->VRegA_22c(inst_data), obj);
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(FILLED_NEW_ARRAY) {
+    bool success = DoFilledNewArray<false, do_access_check>(inst, shadow_frame,
+                                                            self, &result_register);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(FILLED_NEW_ARRAY_RANGE) {
+    bool success = DoFilledNewArray<true, do_access_check>(inst, shadow_frame,
+                                                           self, &result_register);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(FILL_ARRAY_DATA) {
+    Object* obj = shadow_frame.GetVRegReference(inst->VRegA_31t(inst_data));
+    if (UNLIKELY(obj == NULL)) {
+      ThrowNullPointerException(NULL, "null array in FILL_ARRAY_DATA");
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      Array* array = obj->AsArray();
+      DCHECK(array->IsArrayInstance() && !array->IsObjectArray());
+      const uint16_t* payload_addr = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
+      const Instruction::ArrayDataPayload* payload =
+          reinterpret_cast<const Instruction::ArrayDataPayload*>(payload_addr);
+      if (UNLIKELY(static_cast<int32_t>(payload->element_count) > array->GetLength())) {
+        self->ThrowNewExceptionF(shadow_frame.GetCurrentLocationForThrow(),
+                                 "Ljava/lang/ArrayIndexOutOfBoundsException;",
+                                 "failed FILL_ARRAY_DATA; length=%d, index=%d",
+                                 array->GetLength(), payload->element_count);
+        HANDLE_PENDING_EXCEPTION();
+      } else {
+        uint32_t size_in_bytes = payload->element_count * payload->element_width;
+        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+        ADVANCE(3);
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(THROW) {
+    Object* exception = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+    if (UNLIKELY(exception == NULL)) {
+      ThrowNullPointerException(NULL, "throw with null exception");
+    } else if (do_assignability_check && !exception->GetClass()->IsThrowableClass()) {
+      // This should never happen.
+      self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                               "Ljava/lang/VirtualMachineError;",
+                               "Throwing '%s' that is not instance of Throwable",
+                               ClassHelper(exception->GetClass()).GetDescriptor());
+    } else {
+      self->SetException(shadow_frame.GetCurrentLocationForThrow(), exception->AsThrowable());
+    }
+    HANDLE_PENDING_EXCEPTION();
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(GOTO) {
+    int8_t offset = inst->VRegA_10t(inst_data);
+    if (IsBackwardBranch(offset)) {
+      if (UNLIKELY(self->TestAllFlags())) {
+        CheckSuspend(self);
+      }
+      UPDATE_HANDLER_TABLE();
+    }
+    ADVANCE(offset);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(GOTO_16) {
+    int16_t offset = inst->VRegA_20t();
+    if (IsBackwardBranch(offset)) {
+      if (UNLIKELY(self->TestAllFlags())) {
+        CheckSuspend(self);
+      }
+      UPDATE_HANDLER_TABLE();
+    }
+    ADVANCE(offset);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(GOTO_32) {
+    int32_t offset = inst->VRegA_30t();
+    if (IsBackwardBranch(offset)) {
+      if (UNLIKELY(self->TestAllFlags())) {
+        CheckSuspend(self);
+      }
+      UPDATE_HANDLER_TABLE();
+    }
+    ADVANCE(offset);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(PACKED_SWITCH) {
+    int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+    if (IsBackwardBranch(offset)) {
+      if (UNLIKELY(self->TestAllFlags())) {
+        CheckSuspend(self);
+      }
+      UPDATE_HANDLER_TABLE();
+    }
+    ADVANCE(offset);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPARSE_SWITCH) {
+    int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+    if (IsBackwardBranch(offset)) {
+      if (UNLIKELY(self->TestAllFlags())) {
+        CheckSuspend(self);
+      }
+      UPDATE_HANDLER_TABLE();
+    }
+    ADVANCE(offset);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CMPL_FLOAT) {
+    float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
+    float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
+    int32_t result;
+    if (val1 > val2) {
+      result = 1;
+    } else if (val1 == val2) {
+      result = 0;
+    } else {
+      result = -1;
+    }
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CMPG_FLOAT) {
+    float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
+    float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
+    int32_t result;
+    if (val1 < val2) {
+      result = -1;
+    } else if (val1 == val2) {
+      result = 0;
+    } else {
+      result = 1;
+    }
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CMPL_DOUBLE) {
+    double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
+    double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
+    int32_t result;
+    if (val1 > val2) {
+      result = 1;
+    } else if (val1 == val2) {
+      result = 0;
+    } else {
+      result = -1;
+    }
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CMPG_DOUBLE) {
+    double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
+    double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
+    int32_t result;
+    if (val1 < val2) {
+      result = -1;
+    } else if (val1 == val2) {
+      result = 0;
+    } else {
+      result = 1;
+    }
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(CMP_LONG) {
+    int64_t val1 = shadow_frame.GetVRegLong(inst->VRegB_23x());
+    int64_t val2 = shadow_frame.GetVRegLong(inst->VRegC_23x());
+    int32_t result;
+    if (val1 > val2) {
+      result = 1;
+    } else if (val1 == val2) {
+      result = 0;
+    } else {
+      result = -1;
+    }
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+    ADVANCE(2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_EQ) {
+    if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+      int16_t offset = inst->VRegC_22t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_NE) {
+    if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) != shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+      int16_t offset = inst->VRegC_22t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_LT) {
+    if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) < shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+      int16_t offset = inst->VRegC_22t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_GE) {
+    if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >= shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+      int16_t offset = inst->VRegC_22t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_GT) {
+    if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) > shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+      int16_t offset = inst->VRegC_22t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_LE) {
+    if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <= shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+      int16_t offset = inst->VRegC_22t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_EQZ) {
+    if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
+      int16_t offset = inst->VRegB_21t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_NEZ) {
+    if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
+      int16_t offset = inst->VRegB_21t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_LTZ) {
+    if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
+      int16_t offset = inst->VRegB_21t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_GEZ) {
+    if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
+      int16_t offset = inst->VRegB_21t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_GTZ) {
+    if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
+      int16_t offset = inst->VRegB_21t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IF_LEZ)  {
+    if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
+      int16_t offset = inst->VRegB_21t();
+      if (IsBackwardBranch(offset)) {
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        UPDATE_HANDLER_TABLE();
+      }
+      ADVANCE(offset);
+    } else {
+      ADVANCE(2);
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET_BOOLEAN) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      BooleanArray* array = a->AsBooleanArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET_BYTE) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      ByteArray* array = a->AsByteArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET_CHAR) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      CharArray* array = a->AsCharArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET_SHORT) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      ShortArray* array = a->AsShortArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      IntArray* array = a->AsIntArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET_WIDE)  {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      LongArray* array = a->AsLongArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AGET_OBJECT) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      ObjectArray<Object>* array = a->AsObjectArray<Object>();
+      if (LIKELY(array->IsValidIndex(index))) {
+        shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT_BOOLEAN) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      BooleanArray* array = a->AsBooleanArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        array->GetData()[index] = val;
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT_BYTE) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      ByteArray* array = a->AsByteArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        array->GetData()[index] = val;
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT_CHAR) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      CharArray* array = a->AsCharArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        array->GetData()[index] = val;
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT_SHORT) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      ShortArray* array = a->AsShortArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        array->GetData()[index] = val;
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      IntArray* array = a->AsIntArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        array->GetData()[index] = val;
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT_WIDE) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      LongArray* array = a->AsLongArray();
+      if (LIKELY(array->IsValidIndex(index))) {
+        array->GetData()[index] = val;
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(APUT_OBJECT) {
+    Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+    if (UNLIKELY(a == NULL)) {
+      ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+      HANDLE_PENDING_EXCEPTION();
+    } else {
+      int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+      Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
+      ObjectArray<Object>* array = a->AsObjectArray<Object>();
+      if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
+        array->SetWithoutChecks(index, val);
+        ADVANCE(2);
+      } else {
+        HANDLE_PENDING_EXCEPTION();
+      }
+    }
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_BOOLEAN) {
+    bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_BYTE) {
+    bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_CHAR) {
+    bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_SHORT) {
+    bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET) {
+    bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_WIDE) {
+    bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_OBJECT) {
+    bool success = DoFieldGet<InstanceObjectRead, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_QUICK) {
+    bool success = DoIGetQuick<Primitive::kPrimInt>(shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_WIDE_QUICK) {
+    bool success = DoIGetQuick<Primitive::kPrimLong>(shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IGET_OBJECT_QUICK) {
+    bool success = DoIGetQuick<Primitive::kPrimNot>(shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET_BOOLEAN) {
+    bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET_BYTE) {
+    bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET_CHAR) {
+    bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET_SHORT) {
+    bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET) {
+    bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET_WIDE) {
+    bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SGET_OBJECT) {
+    bool success = DoFieldGet<StaticObjectRead, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_BOOLEAN) {
+    bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_BYTE) {
+    bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_CHAR) {
+    bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_SHORT) {
+    bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT) {
+    bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_WIDE) {
+    bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_OBJECT) {
+    bool success = DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_QUICK) {
+    bool success = DoIPutQuick<Primitive::kPrimInt>(shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_WIDE_QUICK) {
+    bool success = DoIPutQuick<Primitive::kPrimLong>(shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(IPUT_OBJECT_QUICK) {
+    bool success = DoIPutQuick<Primitive::kPrimNot>(shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT_BOOLEAN) {
+    bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT_BYTE) {
+    bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT_CHAR) {
+    bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT_SHORT) {
+    bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT) {
+    bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT_WIDE) {
+    bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SPUT_OBJECT) {
+    bool success = DoFieldPut<StaticObjectWrite, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_VIRTUAL) {
+    bool success = DoInvoke<kVirtual, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_VIRTUAL_RANGE) {
+    bool success = DoInvoke<kVirtual, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_SUPER) {
+    bool success = DoInvoke<kSuper, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_SUPER_RANGE) {
+    bool success = DoInvoke<kSuper, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_DIRECT) {
+    bool success = DoInvoke<kDirect, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_DIRECT_RANGE) {
+    bool success = DoInvoke<kDirect, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_INTERFACE) {
+    bool success = DoInvoke<kInterface, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_INTERFACE_RANGE) {
+    bool success = DoInvoke<kInterface, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_STATIC) {
+    bool success = DoInvoke<kStatic, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_STATIC_RANGE) {
+    bool success = DoInvoke<kStatic, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_VIRTUAL_QUICK) {
+    bool success = DoInvokeVirtualQuick<false>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INVOKE_VIRTUAL_RANGE_QUICK) {
+    bool success = DoInvokeVirtualQuick<true>(self, shadow_frame, inst, inst_data, &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 3);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NEG_INT)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NOT_INT)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data), ~shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NEG_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), -shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NOT_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), ~shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NEG_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data), -shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(NEG_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data), -shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INT_TO_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INT_TO_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data), shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INT_TO_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data), shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(LONG_TO_INT)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data), shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(LONG_TO_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data), shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(LONG_TO_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data), shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(FLOAT_TO_INT) {
+    float val = shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data));
+    int32_t result = art_float_to_integral<int32_t, float>(val);
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data), result);
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(FLOAT_TO_LONG) {
+    float val = shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data));
+    int64_t result = art_float_to_integral<int64_t, float>(val);
+    shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), result);
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(FLOAT_TO_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data), shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DOUBLE_TO_INT) {
+    double val = shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data));
+    int32_t result = art_float_to_integral<int32_t, double>(val);
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data), result);
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DOUBLE_TO_LONG) {
+    double val = shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data));
+    int64_t result = art_float_to_integral<int64_t, double>(val);
+    shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), result);
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DOUBLE_TO_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data), shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INT_TO_BYTE)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                         static_cast<int8_t>(shadow_frame.GetVReg(inst->VRegB_12x(inst_data))));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INT_TO_CHAR)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                         static_cast<uint16_t>(shadow_frame.GetVReg(inst->VRegB_12x(inst_data))));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(INT_TO_SHORT)
+    shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                         static_cast<int16_t>(shadow_frame.GetVReg(inst->VRegB_12x(inst_data))));
+    ADVANCE(1);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) +
+                         shadow_frame.GetVReg(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) -
+                         shadow_frame.GetVReg(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) *
+                         shadow_frame.GetVReg(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_INT) {
+    bool success = DoIntDivide(shadow_frame, inst->VRegA_23x(inst_data),
+                               shadow_frame.GetVReg(inst->VRegB_23x()),
+                               shadow_frame.GetVReg(inst->VRegC_23x()));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_INT) {
+    bool success = DoIntRemainder(shadow_frame, inst->VRegA_23x(inst_data),
+                                  shadow_frame.GetVReg(inst->VRegB_23x()),
+                                  shadow_frame.GetVReg(inst->VRegC_23x()));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHL_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) <<
+                         (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHR_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) >>
+                         (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(USHR_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         static_cast<uint32_t>(shadow_frame.GetVReg(inst->VRegB_23x())) >>
+                         (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AND_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) &
+                         shadow_frame.GetVReg(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(OR_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) |
+                         shadow_frame.GetVReg(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(XOR_INT)
+    shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_23x()) ^
+                         shadow_frame.GetVReg(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) +
+                             shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) -
+                             shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) *
+                             shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_LONG) {
+    bool success = DoLongDivide(shadow_frame, inst->VRegA_23x(inst_data),
+                                shadow_frame.GetVRegLong(inst->VRegB_23x()),
+                                shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_LONG) {
+    bool success = DoLongRemainder(shadow_frame, inst->VRegA_23x(inst_data),
+                                   shadow_frame.GetVRegLong(inst->VRegB_23x()),
+                                   shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AND_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) &
+                             shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(OR_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) |
+                             shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(XOR_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) ^
+                             shadow_frame.GetVRegLong(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHL_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) <<
+                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHR_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_23x()) >>
+                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(USHR_LONG)
+    shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                             static_cast<uint64_t>(shadow_frame.GetVRegLong(inst->VRegB_23x())) >>
+                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                              shadow_frame.GetVRegFloat(inst->VRegB_23x()) +
+                              shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                              shadow_frame.GetVRegFloat(inst->VRegB_23x()) -
+                              shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                              shadow_frame.GetVRegFloat(inst->VRegB_23x()) *
+                              shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                              shadow_frame.GetVRegFloat(inst->VRegB_23x()) /
+                              shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_FLOAT)
+    shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                              fmodf(shadow_frame.GetVRegFloat(inst->VRegB_23x()),
+                                    shadow_frame.GetVRegFloat(inst->VRegC_23x())));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                               shadow_frame.GetVRegDouble(inst->VRegB_23x()) +
+                               shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                               shadow_frame.GetVRegDouble(inst->VRegB_23x()) -
+                               shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                               shadow_frame.GetVRegDouble(inst->VRegB_23x()) *
+                               shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                               shadow_frame.GetVRegDouble(inst->VRegB_23x()) /
+                               shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_DOUBLE)
+    shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                               fmod(shadow_frame.GetVRegDouble(inst->VRegB_23x()),
+                                    shadow_frame.GetVRegDouble(inst->VRegC_23x())));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) +
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) -
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) *
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    bool success = DoIntDivide(shadow_frame, vregA, shadow_frame.GetVReg(vregA),
+                               shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    bool success = DoIntRemainder(shadow_frame, vregA, shadow_frame.GetVReg(vregA),
+                                  shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHL_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) <<
+                         (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x1f));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHR_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) >>
+                         (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x1f));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(USHR_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         static_cast<uint32_t>(shadow_frame.GetVReg(vregA)) >>
+                         (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x1f));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AND_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) &
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(OR_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) |
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(XOR_INT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVReg(vregA,
+                         shadow_frame.GetVReg(vregA) ^
+                         shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) +
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) -
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) *
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    bool success = DoLongDivide(shadow_frame, vregA, shadow_frame.GetVRegLong(vregA),
+                                shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    bool success = DoLongRemainder(shadow_frame, vregA, shadow_frame.GetVRegLong(vregA),
+                                   shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AND_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) &
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(OR_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) |
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(XOR_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) ^
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHL_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) <<
+                             (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x3f));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHR_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             shadow_frame.GetVRegLong(vregA) >>
+                             (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x3f));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(USHR_LONG_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegLong(vregA,
+                             static_cast<uint64_t>(shadow_frame.GetVRegLong(vregA)) >>
+                             (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x3f));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_FLOAT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegFloat(vregA,
+                              shadow_frame.GetVRegFloat(vregA) +
+                              shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_FLOAT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegFloat(vregA,
+                              shadow_frame.GetVRegFloat(vregA) -
+                              shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_FLOAT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegFloat(vregA,
+                              shadow_frame.GetVRegFloat(vregA) *
+                              shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_FLOAT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegFloat(vregA,
+                              shadow_frame.GetVRegFloat(vregA) /
+                              shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_FLOAT_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegFloat(vregA,
+                              fmodf(shadow_frame.GetVRegFloat(vregA),
+                                    shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data))));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_DOUBLE_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegDouble(vregA,
+                               shadow_frame.GetVRegDouble(vregA) +
+                               shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SUB_DOUBLE_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegDouble(vregA,
+                               shadow_frame.GetVRegDouble(vregA) -
+                               shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_DOUBLE_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegDouble(vregA,
+                               shadow_frame.GetVRegDouble(vregA) *
+                               shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_DOUBLE_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegDouble(vregA,
+                               shadow_frame.GetVRegDouble(vregA) /
+                               shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_DOUBLE_2ADDR) {
+    uint32_t vregA = inst->VRegA_12x(inst_data);
+    shadow_frame.SetVRegDouble(vregA,
+                               fmod(shadow_frame.GetVRegDouble(vregA),
+                                    shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data))));
+    ADVANCE(1);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_INT_LIT16)
+    shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) +
+                         inst->VRegC_22s());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RSUB_INT)
+    shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                         inst->VRegC_22s() -
+                         shadow_frame.GetVReg(inst->VRegB_22s(inst_data)));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_INT_LIT16)
+    shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) *
+                         inst->VRegC_22s());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_INT_LIT16) {
+    bool success = DoIntDivide(shadow_frame, inst->VRegA_22s(inst_data),
+                               shadow_frame.GetVReg(inst->VRegB_22s(inst_data)), inst->VRegC_22s());
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_INT_LIT16) {
+    bool success = DoIntRemainder(shadow_frame, inst->VRegA_22s(inst_data),
+                                  shadow_frame.GetVReg(inst->VRegB_22s(inst_data)), inst->VRegC_22s());
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AND_INT_LIT16)
+    shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) &
+                         inst->VRegC_22s());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(OR_INT_LIT16)
+    shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) |
+                         inst->VRegC_22s());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(XOR_INT_LIT16)
+    shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) ^
+                         inst->VRegC_22s());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(ADD_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) +
+                         inst->VRegC_22b());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(RSUB_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         inst->VRegC_22b() -
+                         shadow_frame.GetVReg(inst->VRegB_22b()));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(MUL_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) *
+                         inst->VRegC_22b());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(DIV_INT_LIT8) {
+    bool success = DoIntDivide(shadow_frame, inst->VRegA_22b(inst_data),
+                               shadow_frame.GetVReg(inst->VRegB_22b()), inst->VRegC_22b());
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(REM_INT_LIT8) {
+    bool success = DoIntRemainder(shadow_frame, inst->VRegA_22b(inst_data),
+                                  shadow_frame.GetVReg(inst->VRegB_22b()), inst->VRegC_22b());
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(AND_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) &
+                         inst->VRegC_22b());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(OR_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) |
+                         inst->VRegC_22b());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(XOR_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) ^
+                         inst->VRegC_22b());
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHL_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) <<
+                         (inst->VRegC_22b() & 0x1f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(SHR_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         shadow_frame.GetVReg(inst->VRegB_22b()) >>
+                         (inst->VRegC_22b() & 0x1f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(USHR_INT_LIT8)
+    shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                         static_cast<uint32_t>(shadow_frame.GetVReg(inst->VRegB_22b())) >>
+                         (inst->VRegC_22b() & 0x1f));
+    ADVANCE(2);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_3E)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_3F)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_40)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_41)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_42)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_43)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_79)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_7A)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_EB)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_EC)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_ED)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_EE)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_EF)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F0)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F1)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F2)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F3)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F4)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F5)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F6)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F7)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F8)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F9)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_FA)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_FB)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_FC)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_FD)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_FE)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_FF)
+    UnexpectedOpcode(inst, mh);
+  HANDLE_INSTRUCTION_END();
+
+  exception_pending_label: {
+    CHECK(self->IsExceptionPending());
+    if (UNLIKELY(self->TestAllFlags())) {
+      CheckSuspend(self);
+    }
+    Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);
+    uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
+                                                                  this_object,
+                                                                  instrumentation);
+    if (found_dex_pc == DexFile::kDexNoIndex) {
+      return JValue(); /* Handled in caller. */
+    } else {
+      int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc);
+      ADVANCE(displacement);
+    }
+  }
+
+  // Create alternative instruction handlers dedicated to instrumentation.
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+  instrumentation_op_##code: {                                                                \
+    instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),  \
+                                     shadow_frame.GetMethod(), dex_pc);                       \
+    goto *handlersTable[Instruction::code];                                                   \
+  }
+#include "dex_instruction_list.h"
+      DEX_INSTRUCTION_LIST(INSTRUMENTATION_INSTRUCTION_HANDLER)
+#undef DEX_INSTRUCTION_LIST
+#undef INSTRUMENTATION_INSTRUCTION_HANDLER
+}  // NOLINT(readability/fn_size)
+
+// Explicit definitions of ExecuteGotoImpl.
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
+JValue ExecuteGotoImpl<true>(Thread* self, MethodHelper& mh,
+                             const DexFile::CodeItem* code_item,
+                             ShadowFrame& shadow_frame, JValue result_register);
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
+JValue ExecuteGotoImpl<false>(Thread* self, MethodHelper& mh,
+                              const DexFile::CodeItem* code_item,
+                              ShadowFrame& shadow_frame, JValue result_register);
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
new file mode 100644
index 0000000..bd0d87e
--- /dev/null
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -0,0 +1,2147 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter_common.h"
+
+namespace art {
+namespace interpreter {
+
+#define HANDLE_PENDING_EXCEPTION()                                                              \
+  do {                                                                                          \
+    CHECK(self->IsExceptionPending());                                                          \
+    if (UNLIKELY(self->TestAllFlags())) {                                                       \
+      CheckSuspend(self);                                                                       \
+    }                                                                                           \
+    Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                     \
+    uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame,           \
+                                                                  inst->GetDexPc(insns),        \
+                                                                  this_object,                  \
+                                                                  instrumentation);             \
+    if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
+      return JValue(); /* Handled in caller. */                                                 \
+    } else {                                                                                    \
+      int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
+      inst = inst->RelativeAt(displacement);                                                    \
+    }                                                                                           \
+  } while (false)
+
+#define POSSIBLY_HANDLE_PENDING_EXCEPTION(_is_exception_pending, _next_function)  \
+  do {                                                                            \
+    if (UNLIKELY(_is_exception_pending)) {                                        \
+      HANDLE_PENDING_EXCEPTION();                                                 \
+    } else {                                                                      \
+      inst = inst->_next_function();                                              \
+    }                                                                             \
+  } while (false)
+
+// Code to run before each dex instruction.
+#define PREAMBLE()
+
+template<bool do_access_check>
+JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
+                                ShadowFrame& shadow_frame, JValue result_register) {
+  bool do_assignability_check = do_access_check;
+  if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
+    LOG(FATAL) << "Invalid shadow frame for interpreter use";
+    return JValue();
+  }
+  self->VerifyStack();
+
+  uint32_t dex_pc = shadow_frame.GetDexPC();
+  const instrumentation::Instrumentation* const instrumentation = Runtime::Current()->GetInstrumentation();
+  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
+    if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
+      instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                        shadow_frame.GetMethod(), 0);
+    }
+  }
+  const uint16_t* const insns = code_item->insns_;
+  const Instruction* inst = Instruction::At(insns + dex_pc);
+  uint16_t inst_data;
+  while (true) {
+    dex_pc = inst->GetDexPc(insns);
+    shadow_frame.SetDexPC(dex_pc);
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
+    }
+    TraceExecution(shadow_frame, inst, dex_pc, mh);
+    inst_data = inst->Fetch16(0);
+    switch (inst->Opcode(inst_data)) {
+      case Instruction::NOP:
+        PREAMBLE();
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE_FROM16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MOVE_16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_32x(),
+                             shadow_frame.GetVReg(inst->VRegB_32x()));
+        inst = inst->Next_3xx();
+        break;
+      case Instruction::MOVE_WIDE:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE_WIDE_FROM16:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_22x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_22x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MOVE_WIDE_16:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_32x(),
+                                 shadow_frame.GetVRegLong(inst->VRegB_32x()));
+        inst = inst->Next_3xx();
+        break;
+      case Instruction::MOVE_OBJECT:
+        PREAMBLE();
+        shadow_frame.SetVRegReference(inst->VRegA_12x(inst_data),
+                                      shadow_frame.GetVRegReference(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE_OBJECT_FROM16:
+        PREAMBLE();
+        shadow_frame.SetVRegReference(inst->VRegA_22x(inst_data),
+                                      shadow_frame.GetVRegReference(inst->VRegB_22x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MOVE_OBJECT_16:
+        PREAMBLE();
+        shadow_frame.SetVRegReference(inst->VRegA_32x(),
+                                      shadow_frame.GetVRegReference(inst->VRegB_32x()));
+        inst = inst->Next_3xx();
+        break;
+      case Instruction::MOVE_RESULT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_11x(inst_data), result_register.GetI());
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE_RESULT_WIDE:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_11x(inst_data), result_register.GetJ());
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE_RESULT_OBJECT:
+        PREAMBLE();
+        shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), result_register.GetL());
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::MOVE_EXCEPTION: {
+        PREAMBLE();
+        Throwable* exception = self->GetException(NULL);
+        self->ClearException();
+        shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::RETURN_VOID: {
+        PREAMBLE();
+        JValue result;
+        if (do_access_check) {
+          // If access checks are required then the dex-to-dex compiler and analysis of
+          // whether the class has final fields hasn't been performed. Conservatively
+          // perform the memory barrier now.
+          ANDROID_MEMBAR_STORE();
+        }
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+          instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
+                                           result);
+        }
+        return result;
+      }
+      case Instruction::RETURN_VOID_BARRIER: {
+        PREAMBLE();
+        ANDROID_MEMBAR_STORE();
+        JValue result;
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+          instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
+                                           result);
+        }
+        return result;
+      }
+      case Instruction::RETURN: {
+        PREAMBLE();
+        JValue result;
+        result.SetJ(0);
+        result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+          instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
+                                           result);
+        }
+        return result;
+      }
+      case Instruction::RETURN_WIDE: {
+        PREAMBLE();
+        JValue result;
+        result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+          instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
+                                           result);
+        }
+        return result;
+      }
+      case Instruction::RETURN_OBJECT: {
+        PREAMBLE();
+        JValue result;
+        if (UNLIKELY(self->TestAllFlags())) {
+          CheckSuspend(self);
+        }
+        Object* obj_result = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        result.SetJ(0);
+        result.SetL(obj_result);
+        if (do_assignability_check && obj_result != NULL) {
+          Class* return_type = MethodHelper(shadow_frame.GetMethod()).GetReturnType();
+          if (return_type == NULL) {
+            // Return the pending exception.
+            HANDLE_PENDING_EXCEPTION();
+          }
+          if (!obj_result->VerifierInstanceOf(return_type)) {
+            // This should never happen.
+            self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                     "Ljava/lang/VirtualMachineError;",
+                                     "Returning '%s' that is not instance of return type '%s'",
+                                     ClassHelper(obj_result->GetClass()).GetDescriptor(),
+                                     ClassHelper(return_type).GetDescriptor());
+            HANDLE_PENDING_EXCEPTION();
+          }
+        }
+        if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
+          instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), inst->GetDexPc(insns),
+                                           result);
+        }
+        return result;
+      }
+      case Instruction::CONST_4: {
+        PREAMBLE();
+        uint4_t dst = inst->VRegA_11n(inst_data);
+        int4_t val = inst->VRegB_11n(inst_data);
+        shadow_frame.SetVReg(dst, val);
+        if (val == 0) {
+          shadow_frame.SetVRegReference(dst, NULL);
+        }
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::CONST_16: {
+        PREAMBLE();
+        uint8_t dst = inst->VRegA_21s(inst_data);
+        int16_t val = inst->VRegB_21s();
+        shadow_frame.SetVReg(dst, val);
+        if (val == 0) {
+          shadow_frame.SetVRegReference(dst, NULL);
+        }
+        inst = inst->Next_2xx();
+        break;
+      }
+      case Instruction::CONST: {
+        PREAMBLE();
+        uint8_t dst = inst->VRegA_31i(inst_data);
+        int32_t val = inst->VRegB_31i();
+        shadow_frame.SetVReg(dst, val);
+        if (val == 0) {
+          shadow_frame.SetVRegReference(dst, NULL);
+        }
+        inst = inst->Next_3xx();
+        break;
+      }
+      case Instruction::CONST_HIGH16: {
+        PREAMBLE();
+        uint8_t dst = inst->VRegA_21h(inst_data);
+        int32_t val = static_cast<int32_t>(inst->VRegB_21h() << 16);
+        shadow_frame.SetVReg(dst, val);
+        if (val == 0) {
+          shadow_frame.SetVRegReference(dst, NULL);
+        }
+        inst = inst->Next_2xx();
+        break;
+      }
+      case Instruction::CONST_WIDE_16:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_21s(inst_data), inst->VRegB_21s());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::CONST_WIDE_32:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_31i(inst_data), inst->VRegB_31i());
+        inst = inst->Next_3xx();
+        break;
+      case Instruction::CONST_WIDE:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_51l(inst_data), inst->VRegB_51l());
+        inst = inst->Next_51l();
+        break;
+      case Instruction::CONST_WIDE_HIGH16:
+        shadow_frame.SetVRegLong(inst->VRegA_21h(inst_data),
+                                 static_cast<uint64_t>(inst->VRegB_21h()) << 48);
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::CONST_STRING: {
+        PREAMBLE();
+        String* s = ResolveString(self, mh,  inst->VRegB_21c());
+        if (UNLIKELY(s == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), s);
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::CONST_STRING_JUMBO: {
+        PREAMBLE();
+        String* s = ResolveString(self, mh,  inst->VRegB_31c());
+        if (UNLIKELY(s == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_31c(inst_data), s);
+          inst = inst->Next_3xx();
+        }
+        break;
+      }
+      case Instruction::CONST_CLASS: {
+        PREAMBLE();
+        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                          self, false, do_access_check);
+        if (UNLIKELY(c == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), c);
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::MONITOR_ENTER: {
+        PREAMBLE();
+        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        if (UNLIKELY(obj == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          DoMonitorEnter(self, obj);
+          POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
+        }
+        break;
+      }
+      case Instruction::MONITOR_EXIT: {
+        PREAMBLE();
+        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        if (UNLIKELY(obj == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          DoMonitorExit(self, obj);
+          POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
+        }
+        break;
+      }
+      case Instruction::CHECK_CAST: {
+        PREAMBLE();
+        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                          self, false, do_access_check);
+        if (UNLIKELY(c == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          Object* obj = shadow_frame.GetVRegReference(inst->VRegA_21c(inst_data));
+          if (UNLIKELY(obj != NULL && !obj->InstanceOf(c))) {
+            ThrowClassCastException(c, obj->GetClass());
+            HANDLE_PENDING_EXCEPTION();
+          } else {
+            inst = inst->Next_2xx();
+          }
+        }
+        break;
+      }
+      case Instruction::INSTANCE_OF: {
+        PREAMBLE();
+        Class* c = ResolveVerifyAndClinit(inst->VRegC_22c(), shadow_frame.GetMethod(),
+                                          self, false, do_access_check);
+        if (UNLIKELY(c == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+          shadow_frame.SetVReg(inst->VRegA_22c(inst_data), (obj != NULL && obj->InstanceOf(c)) ? 1 : 0);
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::ARRAY_LENGTH:  {
+        PREAMBLE();
+        Object* array = shadow_frame.GetVRegReference(inst->VRegB_12x(inst_data));
+        if (UNLIKELY(array == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVReg(inst->VRegA_12x(inst_data), array->AsArray()->GetLength());
+          inst = inst->Next_1xx();
+        }
+        break;
+      }
+      case Instruction::NEW_INSTANCE: {
+        PREAMBLE();
+        Object* obj = AllocObjectFromCodeInstrumented(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                                      self, do_access_check);
+        if (UNLIKELY(obj == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), obj);
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::NEW_ARRAY: {
+        PREAMBLE();
+        int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
+        Object* obj = AllocArrayFromCodeInstrumented(inst->VRegC_22c(), shadow_frame.GetMethod(),
+                                                     length, self, do_access_check);
+        if (UNLIKELY(obj == NULL)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_22c(inst_data), obj);
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::FILLED_NEW_ARRAY: {
+        PREAMBLE();
+        bool success = DoFilledNewArray<false, do_access_check>(inst, shadow_frame,
+                                                                self, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::FILLED_NEW_ARRAY_RANGE: {
+        PREAMBLE();
+        bool success = DoFilledNewArray<true, do_access_check>(inst, shadow_frame,
+                                                               self, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::FILL_ARRAY_DATA: {
+        PREAMBLE();
+        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_31t(inst_data));
+        if (UNLIKELY(obj == NULL)) {
+          ThrowNullPointerException(NULL, "null array in FILL_ARRAY_DATA");
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        Array* array = obj->AsArray();
+        DCHECK(array->IsArrayInstance() && !array->IsObjectArray());
+        const uint16_t* payload_addr = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
+        const Instruction::ArrayDataPayload* payload =
+            reinterpret_cast<const Instruction::ArrayDataPayload*>(payload_addr);
+        if (UNLIKELY(static_cast<int32_t>(payload->element_count) > array->GetLength())) {
+          self->ThrowNewExceptionF(shadow_frame.GetCurrentLocationForThrow(),
+                                   "Ljava/lang/ArrayIndexOutOfBoundsException;",
+                                   "failed FILL_ARRAY_DATA; length=%d, index=%d",
+                                   array->GetLength(), payload->element_count);
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        uint32_t size_in_bytes = payload->element_count * payload->element_width;
+        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+        inst = inst->Next_3xx();
+        break;
+      }
+      case Instruction::THROW: {
+        PREAMBLE();
+        Object* exception = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        if (UNLIKELY(exception == NULL)) {
+          ThrowNullPointerException(NULL, "throw with null exception");
+        } else if (do_assignability_check && !exception->GetClass()->IsThrowableClass()) {
+          // This should never happen.
+          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                   "Ljava/lang/VirtualMachineError;",
+                                   "Throwing '%s' that is not instance of Throwable",
+                                   ClassHelper(exception->GetClass()).GetDescriptor());
+        } else {
+          self->SetException(shadow_frame.GetCurrentLocationForThrow(), exception->AsThrowable());
+        }
+        HANDLE_PENDING_EXCEPTION();
+        break;
+      }
+      case Instruction::GOTO: {
+        PREAMBLE();
+        int8_t offset = inst->VRegA_10t(inst_data);
+        if (IsBackwardBranch(offset)) {
+          if (UNLIKELY(self->TestAllFlags())) {
+            CheckSuspend(self);
+          }
+        }
+        inst = inst->RelativeAt(offset);
+        break;
+      }
+      case Instruction::GOTO_16: {
+        PREAMBLE();
+        int16_t offset = inst->VRegA_20t();
+        if (IsBackwardBranch(offset)) {
+          if (UNLIKELY(self->TestAllFlags())) {
+            CheckSuspend(self);
+          }
+        }
+        inst = inst->RelativeAt(offset);
+        break;
+      }
+      case Instruction::GOTO_32: {
+        PREAMBLE();
+        int32_t offset = inst->VRegA_30t();
+        if (IsBackwardBranch(offset)) {
+          if (UNLIKELY(self->TestAllFlags())) {
+            CheckSuspend(self);
+          }
+        }
+        inst = inst->RelativeAt(offset);
+        break;
+      }
+      case Instruction::PACKED_SWITCH: {
+        PREAMBLE();
+        int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+        if (IsBackwardBranch(offset)) {
+          if (UNLIKELY(self->TestAllFlags())) {
+            CheckSuspend(self);
+          }
+        }
+        inst = inst->RelativeAt(offset);
+        break;
+      }
+      case Instruction::SPARSE_SWITCH: {
+        PREAMBLE();
+        int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+        if (IsBackwardBranch(offset)) {
+          if (UNLIKELY(self->TestAllFlags())) {
+            CheckSuspend(self);
+          }
+        }
+        inst = inst->RelativeAt(offset);
+        break;
+      }
+      case Instruction::CMPL_FLOAT: {
+        PREAMBLE();
+        float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
+        float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
+        int32_t result;
+        if (val1 > val2) {
+          result = 1;
+        } else if (val1 == val2) {
+          result = 0;
+        } else {
+          result = -1;
+        }
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+        inst = inst->Next_2xx();
+        break;
+      }
+      case Instruction::CMPG_FLOAT: {
+        PREAMBLE();
+        float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
+        float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
+        int32_t result;
+        if (val1 < val2) {
+          result = -1;
+        } else if (val1 == val2) {
+          result = 0;
+        } else {
+          result = 1;
+        }
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+        inst = inst->Next_2xx();
+        break;
+      }
+      case Instruction::CMPL_DOUBLE: {
+        PREAMBLE();
+        double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
+        double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
+        int32_t result;
+        if (val1 > val2) {
+          result = 1;
+        } else if (val1 == val2) {
+          result = 0;
+        } else {
+          result = -1;
+        }
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+        inst = inst->Next_2xx();
+        break;
+      }
+
+      case Instruction::CMPG_DOUBLE: {
+        PREAMBLE();
+        double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
+        double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
+        int32_t result;
+        if (val1 < val2) {
+          result = -1;
+        } else if (val1 == val2) {
+          result = 0;
+        } else {
+          result = 1;
+        }
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+        inst = inst->Next_2xx();
+        break;
+      }
+      case Instruction::CMP_LONG: {
+        PREAMBLE();
+        int64_t val1 = shadow_frame.GetVRegLong(inst->VRegB_23x());
+        int64_t val2 = shadow_frame.GetVRegLong(inst->VRegC_23x());
+        int32_t result;
+        if (val1 > val2) {
+          result = 1;
+        } else if (val1 == val2) {
+          result = 0;
+        } else {
+          result = -1;
+        }
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), result);
+        inst = inst->Next_2xx();
+        break;
+      }
+      case Instruction::IF_EQ: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+          int16_t offset = inst->VRegC_22t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_NE: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) != shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+          int16_t offset = inst->VRegC_22t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_LT: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) < shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+          int16_t offset = inst->VRegC_22t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_GE: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >= shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+          int16_t offset = inst->VRegC_22t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_GT: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) > shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+          int16_t offset = inst->VRegC_22t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_LE: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <= shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
+          int16_t offset = inst->VRegC_22t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_EQZ: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
+          int16_t offset = inst->VRegB_21t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_NEZ: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
+          int16_t offset = inst->VRegB_21t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_LTZ: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
+          int16_t offset = inst->VRegB_21t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_GEZ: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
+          int16_t offset = inst->VRegB_21t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_GTZ: {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
+          int16_t offset = inst->VRegB_21t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::IF_LEZ:  {
+        PREAMBLE();
+        if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
+          int16_t offset = inst->VRegB_21t();
+          if (IsBackwardBranch(offset)) {
+            if (UNLIKELY(self->TestAllFlags())) {
+              CheckSuspend(self);
+            }
+          }
+          inst = inst->RelativeAt(offset);
+        } else {
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::AGET_BOOLEAN: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        BooleanArray* array = a->AsBooleanArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::AGET_BYTE: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        ByteArray* array = a->AsByteArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::AGET_CHAR: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        CharArray* array = a->AsCharArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::AGET_SHORT: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        ShortArray* array = a->AsShortArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::AGET: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        IntArray* array = a->AsIntArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::AGET_WIDE:  {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        LongArray* array = a->AsLongArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetData()[index]);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::AGET_OBJECT: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        ObjectArray<Object>* array = a->AsObjectArray<Object>();
+        if (LIKELY(array->IsValidIndex(index))) {
+          shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT_BOOLEAN: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        BooleanArray* array = a->AsBooleanArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          array->GetData()[index] = val;
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT_BYTE: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        ByteArray* array = a->AsByteArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          array->GetData()[index] = val;
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT_CHAR: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        CharArray* array = a->AsCharArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          array->GetData()[index] = val;
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT_SHORT: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        ShortArray* array = a->AsShortArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          array->GetData()[index] = val;
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        IntArray* array = a->AsIntArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          array->GetData()[index] = val;
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT_WIDE: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        LongArray* array = a->AsLongArray();
+        if (LIKELY(array->IsValidIndex(index))) {
+          array->GetData()[index] = val;
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::APUT_OBJECT: {
+        PREAMBLE();
+        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        if (UNLIKELY(a == NULL)) {
+          ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+          HANDLE_PENDING_EXCEPTION();
+          break;
+        }
+        int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
+        Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
+        ObjectArray<Object>* array = a->AsObjectArray<Object>();
+        if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
+          array->SetWithoutChecks(index, val);
+          inst = inst->Next_2xx();
+        } else {
+          HANDLE_PENDING_EXCEPTION();
+        }
+        break;
+      }
+      case Instruction::IGET_BOOLEAN: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_BYTE: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_CHAR: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_SHORT: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_WIDE: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstancePrimitiveRead, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_OBJECT: {
+        PREAMBLE();
+        bool success = DoFieldGet<InstanceObjectRead, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_QUICK: {
+        PREAMBLE();
+        bool success = DoIGetQuick<Primitive::kPrimInt>(shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_WIDE_QUICK: {
+        PREAMBLE();
+        bool success = DoIGetQuick<Primitive::kPrimLong>(shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IGET_OBJECT_QUICK: {
+        PREAMBLE();
+        bool success = DoIGetQuick<Primitive::kPrimNot>(shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET_BOOLEAN: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET_BYTE: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET_CHAR: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET_SHORT: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET_WIDE: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SGET_OBJECT: {
+        PREAMBLE();
+        bool success = DoFieldGet<StaticObjectRead, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_BOOLEAN: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_BYTE: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_CHAR: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_SHORT: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_WIDE: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstancePrimitiveWrite, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_OBJECT: {
+        PREAMBLE();
+        bool success = DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_QUICK: {
+        PREAMBLE();
+        bool success = DoIPutQuick<Primitive::kPrimInt>(shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_WIDE_QUICK: {
+        PREAMBLE();
+        bool success = DoIPutQuick<Primitive::kPrimLong>(shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::IPUT_OBJECT_QUICK: {
+        PREAMBLE();
+        bool success = DoIPutQuick<Primitive::kPrimNot>(shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT_BOOLEAN: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimBoolean, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT_BYTE: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimByte, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT_CHAR: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimChar, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT_SHORT: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimShort, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimInt, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT_WIDE: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticPrimitiveWrite, Primitive::kPrimLong, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SPUT_OBJECT: {
+        PREAMBLE();
+        bool success = DoFieldPut<StaticObjectWrite, Primitive::kPrimNot, do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::INVOKE_VIRTUAL: {
+        PREAMBLE();
+        bool success = DoInvoke<kVirtual, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_VIRTUAL_RANGE: {
+        PREAMBLE();
+        bool success = DoInvoke<kVirtual, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_SUPER: {
+        PREAMBLE();
+        bool success = DoInvoke<kSuper, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_SUPER_RANGE: {
+        PREAMBLE();
+        bool success = DoInvoke<kSuper, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_DIRECT: {
+        PREAMBLE();
+        bool success = DoInvoke<kDirect, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_DIRECT_RANGE: {
+        PREAMBLE();
+        bool success = DoInvoke<kDirect, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_INTERFACE: {
+        PREAMBLE();
+        bool success = DoInvoke<kInterface, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_INTERFACE_RANGE: {
+        PREAMBLE();
+        bool success = DoInvoke<kInterface, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_STATIC: {
+        PREAMBLE();
+        bool success = DoInvoke<kStatic, false, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_STATIC_RANGE: {
+        PREAMBLE();
+        bool success = DoInvoke<kStatic, true, do_access_check>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_VIRTUAL_QUICK: {
+        PREAMBLE();
+        bool success = DoInvokeVirtualQuick<false>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+        PREAMBLE();
+        bool success = DoInvokeVirtualQuick<true>(self, shadow_frame, inst, inst_data, &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_3xx);
+        break;
+      }
+      case Instruction::NEG_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::NOT_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data), ~shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::NEG_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), -shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::NOT_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), ~shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::NEG_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data), -shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::NEG_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data), -shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::INT_TO_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data),
+                                 shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::INT_TO_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data),
+                                  shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::INT_TO_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data),
+                                   shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::LONG_TO_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                             shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::LONG_TO_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data),
+                                  shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::LONG_TO_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data),
+                                   shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::FLOAT_TO_INT: {
+        PREAMBLE();
+        float val = shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data));
+        int32_t result = art_float_to_integral<int32_t, float>(val);
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data), result);
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::FLOAT_TO_LONG: {
+        PREAMBLE();
+        float val = shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data));
+        int64_t result = art_float_to_integral<int64_t, float>(val);
+        shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), result);
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::FLOAT_TO_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_12x(inst_data),
+                                   shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::DOUBLE_TO_INT: {
+        PREAMBLE();
+        double val = shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data));
+        int32_t result = art_float_to_integral<int32_t, double>(val);
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data), result);
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::DOUBLE_TO_LONG: {
+        PREAMBLE();
+        double val = shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data));
+        int64_t result = art_float_to_integral<int64_t, double>(val);
+        shadow_frame.SetVRegLong(inst->VRegA_12x(inst_data), result);
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::DOUBLE_TO_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_12x(inst_data),
+                                  shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::INT_TO_BYTE:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                             static_cast<int8_t>(shadow_frame.GetVReg(inst->VRegB_12x(inst_data))));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::INT_TO_CHAR:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                             static_cast<uint16_t>(shadow_frame.GetVReg(inst->VRegB_12x(inst_data))));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::INT_TO_SHORT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_12x(inst_data),
+                             static_cast<int16_t>(shadow_frame.GetVReg(inst->VRegB_12x(inst_data))));
+        inst = inst->Next_1xx();
+        break;
+      case Instruction::ADD_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) +
+                             shadow_frame.GetVReg(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SUB_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) -
+                             shadow_frame.GetVReg(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MUL_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) *
+                             shadow_frame.GetVReg(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::DIV_INT: {
+        PREAMBLE();
+        bool success = DoIntDivide(shadow_frame, inst->VRegA_23x(inst_data),
+                                   shadow_frame.GetVReg(inst->VRegB_23x()),
+                                   shadow_frame.GetVReg(inst->VRegC_23x()));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::REM_INT: {
+        PREAMBLE();
+        bool success = DoIntRemainder(shadow_frame, inst->VRegA_23x(inst_data),
+                                      shadow_frame.GetVReg(inst->VRegB_23x()),
+                                      shadow_frame.GetVReg(inst->VRegC_23x()));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::SHL_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) <<
+                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SHR_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) >>
+                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::USHR_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             static_cast<uint32_t>(shadow_frame.GetVReg(inst->VRegB_23x())) >>
+                             (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x1f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::AND_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) &
+                             shadow_frame.GetVReg(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::OR_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) |
+                             shadow_frame.GetVReg(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::XOR_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_23x()) ^
+                             shadow_frame.GetVReg(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::ADD_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) +
+                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SUB_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) -
+                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MUL_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) *
+                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::DIV_LONG:
+        PREAMBLE();
+        DoLongDivide(shadow_frame, inst->VRegA_23x(inst_data),
+                     shadow_frame.GetVRegLong(inst->VRegB_23x()),
+                    shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_2xx);
+        break;
+      case Instruction::REM_LONG:
+        PREAMBLE();
+        DoLongRemainder(shadow_frame, inst->VRegA_23x(inst_data),
+                        shadow_frame.GetVRegLong(inst->VRegB_23x()),
+                        shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_2xx);
+        break;
+      case Instruction::AND_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) &
+                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::OR_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) |
+                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::XOR_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) ^
+                                 shadow_frame.GetVRegLong(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SHL_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) <<
+                                 (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SHR_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 shadow_frame.GetVRegLong(inst->VRegB_23x()) >>
+                                 (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::USHR_LONG:
+        PREAMBLE();
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data),
+                                 static_cast<uint64_t>(shadow_frame.GetVRegLong(inst->VRegB_23x())) >>
+                                 (shadow_frame.GetVReg(inst->VRegC_23x()) & 0x3f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::ADD_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) +
+                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SUB_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) -
+                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MUL_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) *
+                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::DIV_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                                  shadow_frame.GetVRegFloat(inst->VRegB_23x()) /
+                                  shadow_frame.GetVRegFloat(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::REM_FLOAT:
+        PREAMBLE();
+        shadow_frame.SetVRegFloat(inst->VRegA_23x(inst_data),
+                                  fmodf(shadow_frame.GetVRegFloat(inst->VRegB_23x()),
+                                        shadow_frame.GetVRegFloat(inst->VRegC_23x())));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::ADD_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) +
+                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SUB_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) -
+                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MUL_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) *
+                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::DIV_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                                   shadow_frame.GetVRegDouble(inst->VRegB_23x()) /
+                                   shadow_frame.GetVRegDouble(inst->VRegC_23x()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::REM_DOUBLE:
+        PREAMBLE();
+        shadow_frame.SetVRegDouble(inst->VRegA_23x(inst_data),
+                                   fmod(shadow_frame.GetVRegDouble(inst->VRegB_23x()),
+                                        shadow_frame.GetVRegDouble(inst->VRegC_23x())));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::ADD_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) +
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SUB_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) -
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::MUL_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) *
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::DIV_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        bool success = DoIntDivide(shadow_frame, vregA, shadow_frame.GetVReg(vregA),
+                                   shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_1xx);
+        break;
+      }
+      case Instruction::REM_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        bool success = DoIntRemainder(shadow_frame, vregA, shadow_frame.GetVReg(vregA),
+                                      shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_1xx);
+        break;
+      }
+      case Instruction::SHL_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) <<
+                             (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x1f));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SHR_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) >>
+                             (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x1f));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::USHR_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             static_cast<uint32_t>(shadow_frame.GetVReg(vregA)) >>
+                             (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x1f));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::AND_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) &
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::OR_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) |
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::XOR_INT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVReg(vregA,
+                             shadow_frame.GetVReg(vregA) ^
+                             shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::ADD_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) +
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SUB_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) -
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::MUL_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) *
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::DIV_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        DoLongDivide(shadow_frame, vregA, shadow_frame.GetVRegLong(vregA),
+                    shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
+        break;
+      }
+      case Instruction::REM_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        DoLongRemainder(shadow_frame, vregA, shadow_frame.GetVRegLong(vregA),
+                        shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
+        break;
+      }
+      case Instruction::AND_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) &
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::OR_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) |
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::XOR_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) ^
+                                 shadow_frame.GetVRegLong(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SHL_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) <<
+                                 (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x3f));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SHR_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 shadow_frame.GetVRegLong(vregA) >>
+                                 (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x3f));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::USHR_LONG_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegLong(vregA,
+                                 static_cast<uint64_t>(shadow_frame.GetVRegLong(vregA)) >>
+                                 (shadow_frame.GetVReg(inst->VRegB_12x(inst_data)) & 0x3f));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::ADD_FLOAT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegFloat(vregA,
+                                  shadow_frame.GetVRegFloat(vregA) +
+                                  shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SUB_FLOAT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegFloat(vregA,
+                                  shadow_frame.GetVRegFloat(vregA) -
+                                  shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::MUL_FLOAT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegFloat(vregA,
+                                  shadow_frame.GetVRegFloat(vregA) *
+                                  shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::DIV_FLOAT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegFloat(vregA,
+                                  shadow_frame.GetVRegFloat(vregA) /
+                                  shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::REM_FLOAT_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegFloat(vregA,
+                                  fmodf(shadow_frame.GetVRegFloat(vregA),
+                                        shadow_frame.GetVRegFloat(inst->VRegB_12x(inst_data))));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::ADD_DOUBLE_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegDouble(vregA,
+                                   shadow_frame.GetVRegDouble(vregA) +
+                                   shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::SUB_DOUBLE_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegDouble(vregA,
+                                   shadow_frame.GetVRegDouble(vregA) -
+                                   shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::MUL_DOUBLE_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegDouble(vregA,
+                                   shadow_frame.GetVRegDouble(vregA) *
+                                   shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::DIV_DOUBLE_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegDouble(vregA,
+                                   shadow_frame.GetVRegDouble(vregA) /
+                                   shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data)));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::REM_DOUBLE_2ADDR: {
+        PREAMBLE();
+        uint4_t vregA = inst->VRegA_12x(inst_data);
+        shadow_frame.SetVRegDouble(vregA,
+                                   fmod(shadow_frame.GetVRegDouble(vregA),
+                                        shadow_frame.GetVRegDouble(inst->VRegB_12x(inst_data))));
+        inst = inst->Next_1xx();
+        break;
+      }
+      case Instruction::ADD_INT_LIT16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) +
+                             inst->VRegC_22s());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::RSUB_INT:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                             inst->VRegC_22s() -
+                             shadow_frame.GetVReg(inst->VRegB_22s(inst_data)));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MUL_INT_LIT16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) *
+                             inst->VRegC_22s());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::DIV_INT_LIT16: {
+        PREAMBLE();
+        bool success = DoIntDivide(shadow_frame, inst->VRegA_22s(inst_data),
+                                   shadow_frame.GetVReg(inst->VRegB_22s(inst_data)), inst->VRegC_22s());
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::REM_INT_LIT16: {
+        PREAMBLE();
+        bool success = DoIntRemainder(shadow_frame, inst->VRegA_22s(inst_data),
+                                      shadow_frame.GetVReg(inst->VRegB_22s(inst_data)), inst->VRegC_22s());
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::AND_INT_LIT16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) &
+                             inst->VRegC_22s());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::OR_INT_LIT16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) |
+                             inst->VRegC_22s());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::XOR_INT_LIT16:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22s(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22s(inst_data)) ^
+                             inst->VRegC_22s());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::ADD_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) +
+                             inst->VRegC_22b());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::RSUB_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             inst->VRegC_22b() -
+                             shadow_frame.GetVReg(inst->VRegB_22b()));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::MUL_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) *
+                             inst->VRegC_22b());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::DIV_INT_LIT8: {
+        PREAMBLE();
+        bool success = DoIntDivide(shadow_frame, inst->VRegA_22b(inst_data),
+                                   shadow_frame.GetVReg(inst->VRegB_22b()), inst->VRegC_22b());
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::REM_INT_LIT8: {
+        PREAMBLE();
+        bool success = DoIntRemainder(shadow_frame, inst->VRegA_22b(inst_data),
+                                      shadow_frame.GetVReg(inst->VRegB_22b()), inst->VRegC_22b());
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::AND_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) &
+                             inst->VRegC_22b());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::OR_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) |
+                             inst->VRegC_22b());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::XOR_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) ^
+                             inst->VRegC_22b());
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SHL_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) <<
+                             (inst->VRegC_22b() & 0x1f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::SHR_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             shadow_frame.GetVReg(inst->VRegB_22b()) >>
+                             (inst->VRegC_22b() & 0x1f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::USHR_INT_LIT8:
+        PREAMBLE();
+        shadow_frame.SetVReg(inst->VRegA_22b(inst_data),
+                             static_cast<uint32_t>(shadow_frame.GetVReg(inst->VRegB_22b())) >>
+                             (inst->VRegC_22b() & 0x1f));
+        inst = inst->Next_2xx();
+        break;
+      case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
+      case Instruction::UNUSED_EB ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_79:
+      case Instruction::UNUSED_7A:
+        UnexpectedOpcode(inst, mh);
+    }
+  }
+}  // NOLINT(readability/fn_size)
+
+// Explicit definitions of ExecuteSwitchImpl.
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
+JValue ExecuteSwitchImpl<true>(Thread* self, MethodHelper& mh,
+                               const DexFile::CodeItem* code_item,
+                               ShadowFrame& shadow_frame, JValue result_register);
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
+JValue ExecuteSwitchImpl<false>(Thread* self, MethodHelper& mh,
+                                const DexFile::CodeItem* code_item,
+                                ShadowFrame& shadow_frame, JValue result_register);
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index a2efc48..523d892 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -47,7 +47,7 @@
 
 std::string DescribeRefTypeId(const RefTypeId& ref_type_id) {
   std::string signature("unknown");
-  Dbg::GetSignature(ref_type_id, signature);
+  Dbg::GetSignature(ref_type_id, &signature);
   return StringPrintf("%#llx (%s)", ref_type_id, signature.c_str());
 }
 
@@ -547,7 +547,7 @@
   RefTypeId refTypeId = request.ReadRefTypeId();
 
   std::string signature;
-  JdwpError status = Dbg::GetSignature(refTypeId, signature);
+  JdwpError status = Dbg::GetSignature(refTypeId, &signature);
   if (status != ERR_NONE) {
     return status;
   }
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 6a0990e..29fc7a4 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -228,6 +228,7 @@
                               const char* name, const char* sig, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   Class* c = soa.Decode<Class*>(jni_class);
+  DCHECK(c != nullptr);
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
     return NULL;
   }
@@ -324,14 +325,14 @@
   return soa.EncodeField(field);
 }
 
-static void PinPrimitiveArray(const ScopedObjectAccess& soa, const Array* array)
+static void PinPrimitiveArray(const ScopedObjectAccess& soa, Array* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JavaVMExt* vm = soa.Vm();
   MutexLock mu(soa.Self(), vm->pins_lock);
   vm->pin_table.Add(array);
 }
 
-static void UnpinPrimitiveArray(const ScopedObjectAccess& soa, const Array* array)
+static void UnpinPrimitiveArray(const ScopedObjectAccess& soa, Array* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JavaVMExt* vm = soa.Vm();
   MutexLock mu(soa.Self(), vm->pins_lock);
@@ -450,7 +451,7 @@
         class_loader_(class_loader),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
-        jni_on_load_thread_id_(Thread::Current()->GetThinLockId()),
+        jni_on_load_thread_id_(Thread::Current()->GetThreadId()),
         jni_on_load_result_(kPending) {
   }
 
@@ -475,7 +476,7 @@
     {
       MutexLock mu(self, jni_on_load_lock_);
 
-      if (jni_on_load_thread_id_ == self->GetThinLockId()) {
+      if (jni_on_load_thread_id_ == self->GetThreadId()) {
         // Check this so we don't end up waiting for ourselves.  We need to return "true" so the
         // caller can continue.
         LOG(INFO) << *self << " recursive attempt to load library " << "\"" << path_ << "\"";
@@ -1999,7 +2000,7 @@
     CHECK_NON_NULL_ARGUMENT(GetStringUTFRegion, java_string);
     ScopedObjectAccess soa(env);
     String* s = soa.Decode<String*>(java_string);
-    const CharArray* chars = s->GetCharArray();
+    CharArray* chars = s->GetCharArray();
     PinPrimitiveArray(soa, chars);
     if (is_copy != NULL) {
       *is_copy = JNI_FALSE;
@@ -3081,15 +3082,6 @@
   weak_globals_add_condition_.Broadcast(self);
 }
 
-void JavaVMExt::SweepWeakGlobals(IsMarkedTester is_marked, void* arg) {
-  MutexLock mu(Thread::Current(), weak_globals_lock_);
-  for (const Object** entry : weak_globals_) {
-    if (!is_marked(*entry, arg)) {
-      *entry = kClearedJniWeakGlobal;
-    }
-  }
-}
-
 mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
   MutexLock mu(self, weak_globals_lock_);
   while (UNLIKELY(!allow_new_weak_globals_)) {
@@ -3271,6 +3263,18 @@
   return native_method;
 }
 
+void JavaVMExt::SweepJniWeakGlobals(RootVisitor visitor, void* arg) {
+  MutexLock mu(Thread::Current(), weak_globals_lock_);
+  for (mirror::Object** entry : weak_globals_) {
+    mirror::Object* obj = *entry;
+    mirror::Object* new_obj = visitor(obj, arg);
+    if (new_obj == nullptr) {
+      new_obj = kClearedJniWeakGlobal;
+    }
+    *entry = new_obj;
+  }
+}
+
 void JavaVMExt::VisitRoots(RootVisitor* visitor, void* arg) {
   Thread* self = Thread::Current();
   {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 32d0bfc..c73ed48 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -98,7 +98,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DeleteWeakGlobalRef(Thread* self, jweak obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SweepWeakGlobals(IsMarkedTester is_marked, void* arg);
+  void SweepJniWeakGlobals(RootVisitor visitor, void* arg);
   mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref);
 
   Runtime* runtime;
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 79d156d..c389580 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1012,31 +1012,50 @@
                                scalar_type, \
                                expected_class_descriptor) \
   jsize size = 4; \
+  \
   /* Allocate an array and check it has the right type and length. */ \
   scalar_type ## Array a = env_->new_fn(size); \
   EXPECT_TRUE(a != NULL); \
   EXPECT_TRUE(env_->IsInstanceOf(a, env_->FindClass(expected_class_descriptor))); \
   EXPECT_EQ(size, env_->GetArrayLength(a)); \
+  \
+  /* GetPrimitiveArrayRegion/SetPrimitiveArrayRegion */ \
   /* AIOOBE for negative start offset. */ \
   env_->get_region_fn(a, -1, 1, NULL); \
   EXPECT_EXCEPTION(aioobe_); \
   env_->set_region_fn(a, -1, 1, NULL); \
   EXPECT_EXCEPTION(aioobe_); \
+  \
   /* AIOOBE for negative length. */ \
   env_->get_region_fn(a, 0, -1, NULL); \
   EXPECT_EXCEPTION(aioobe_); \
   env_->set_region_fn(a, 0, -1, NULL); \
   EXPECT_EXCEPTION(aioobe_); \
+  \
   /* AIOOBE for buffer overrun. */ \
   env_->get_region_fn(a, size - 1, size, NULL); \
   EXPECT_EXCEPTION(aioobe_); \
   env_->set_region_fn(a, size - 1, size, NULL); \
   EXPECT_EXCEPTION(aioobe_); \
+  \
+  /* It's okay for the buffer to be NULL as long as the length is 0. */ \
+  env_->get_region_fn(a, 2, 0, NULL); \
+  /* Even if the offset is invalid... */ \
+  env_->get_region_fn(a, 123, 0, NULL); \
+  EXPECT_EXCEPTION(aioobe_); \
+  \
+  /* It's okay for the buffer to be NULL as long as the length is 0. */ \
+  env_->set_region_fn(a, 2, 0, NULL); \
+  /* Even if the offset is invalid... */ \
+  env_->set_region_fn(a, 123, 0, NULL); \
+  EXPECT_EXCEPTION(aioobe_); \
+  \
   /* Prepare a couple of buffers. */ \
   UniquePtr<scalar_type[]> src_buf(new scalar_type[size]); \
   UniquePtr<scalar_type[]> dst_buf(new scalar_type[size]); \
   for (jsize i = 0; i < size; ++i) { src_buf[i] = scalar_type(i); } \
   for (jsize i = 0; i < size; ++i) { dst_buf[i] = scalar_type(-1); } \
+  \
   /* Copy all of src_buf onto the heap. */ \
   env_->set_region_fn(a, 0, size, &src_buf[0]); \
   /* Copy back only part. */ \
@@ -1252,6 +1271,12 @@
   EXPECT_EQ('l', chars[2]);
   EXPECT_EQ('x', chars[3]);
 
+  // It's okay for the buffer to be NULL as long as the length is 0.
+  env_->GetStringRegion(s, 2, 0, NULL);
+  // Even if the offset is invalid...
+  env_->GetStringRegion(s, 123, 0, NULL);
+  EXPECT_EXCEPTION(sioobe_);
+
   env_->GetStringUTFRegion(s, -1, 0, NULL);
   EXPECT_EXCEPTION(sioobe_);
   env_->GetStringUTFRegion(s, 0, -1, NULL);
@@ -1267,6 +1292,12 @@
   EXPECT_EQ('e', bytes[1]);
   EXPECT_EQ('l', bytes[2]);
   EXPECT_EQ('x', bytes[3]);
+
+  // It's okay for the buffer to be NULL as long as the length is 0.
+  env_->GetStringUTFRegion(s, 2, 0, NULL);
+  // Even if the offset is invalid...
+  env_->GetStringUTFRegion(s, 123, 0, NULL);
+  EXPECT_EXCEPTION(sioobe_);
 }
 
 TEST_F(JniInternalTest, GetStringUTFChars_ReleaseStringUTFChars) {
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
new file mode 100644
index 0000000..30bf9bb
--- /dev/null
+++ b/runtime/lock_word-inl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LOCK_WORD_INL_H_
+#define ART_RUNTIME_LOCK_WORD_INL_H_
+
+#include "lock_word.h"
+
+namespace art {
+
+inline uint32_t LockWord::ThinLockOwner() const {
+  DCHECK_EQ(GetState(), kThinLocked);
+  return (value_ >> kThinLockOwnerShift) & kThinLockOwnerMask;
+}
+
+inline uint32_t LockWord::ThinLockCount() const {
+  DCHECK_EQ(GetState(), kThinLocked);
+  return (value_ >> kThinLockCountShift) & kThinLockCountMask;
+}
+
+inline Monitor* LockWord::FatLockMonitor() const {
+  DCHECK_EQ(GetState(), kFatLocked);
+  return reinterpret_cast<Monitor*>(value_ << 1);
+}
+
+inline LockWord::LockWord() : value_(0) {
+  DCHECK_EQ(GetState(), kUnlocked);
+}
+
+inline LockWord::LockWord(Monitor* mon)
+    : value_((reinterpret_cast<uint32_t>(mon) >> 1) | (kStateFat << kStateShift)) {
+  DCHECK_EQ(FatLockMonitor(), mon);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_LOCK_WORD_INL_H_
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
new file mode 100644
index 0000000..cd4bfbb
--- /dev/null
+++ b/runtime/lock_word.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LOCK_WORD_H_
+#define ART_RUNTIME_LOCK_WORD_H_
+
+#include <iosfwd>
+#include <stdint.h>
+
+#include "base/logging.h"
+
+namespace art {
+namespace mirror {
+  class Object;
+}  // namespace mirror
+
+class Monitor;
+
+/* The lock value itself as stored in mirror::Object::monitor_.  The MSB of the lock encodes its
+ * state.  When cleared, the lock is in the "thin" state and its bits are formatted as follows:
+ *
+ *  |3|32222222222111|11111110000000000|
+ *  |1|09876543210987|65432109876543210|
+ *  |0| lock count   | thread id       |
+ *
+ * When set, the lock is in the "fat" state and its bits are formatted as follows:
+ *
+ *  |3|3222222222211111111110000000000|
+ *  |1|0987654321098765432109876543210|
+ *  |1| Monitor* >> 1                 |
+ */
+class LockWord {
+ public:
+  enum {
+    // Number of bits to encode the state, currently just fat or thin/unlocked.
+    kStateSize = 1,
+    // Number of bits to encode the thin lock owner.
+    kThinLockOwnerSize = 16,
+    // Remaining bits are the recursive lock count.
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize,
+
+    // Thin lock bits. Owner in lowest bits.
+    kThinLockOwnerShift = 0,
+    kThinLockOwnerMask = (1 << kThinLockOwnerSize) - 1,
+    // Count in higher bits.
+    kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift,
+    kThinLockCountMask = (1 << kThinLockCountShift) - 1,
+    kThinLockMaxCount = kThinLockCountMask,
+
+    // State in the highest bits.
+    kStateShift = kThinLockCountSize + kThinLockCountShift,
+    kStateMask = (1 << kStateSize) - 1,
+    kStateThinOrUnlocked = 0,
+    kStateFat = 1,
+  };
+
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count) {
+    CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockOwnerMask));
+    return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift));
+  }
+
+  enum LockState {
+    kUnlocked,    // No lock owners.
+    kThinLocked,  // Single uncontended owner.
+    kFatLocked    // See associated monitor.
+  };
+
+  LockState GetState() const {
+    if (value_ == 0) {
+      return kUnlocked;
+    } else if (((value_ >> kStateShift) & kStateMask) == kStateThinOrUnlocked) {
+      return kThinLocked;
+    } else {
+      return kFatLocked;
+    }
+  }
+
+  // Return the owner thin lock thread id.
+  uint32_t ThinLockOwner() const;
+
+  // Return the number of times a lock value has been locked.
+  uint32_t ThinLockCount() const;
+
+  // Return the Monitor encoded in a fat lock.
+  Monitor* FatLockMonitor() const;
+
+  // Default constructor with no lock ownership.
+  LockWord();
+
+  // Constructor a lock word for inflation to use a Monitor.
+  explicit LockWord(Monitor* mon);
+
+  bool operator==(const LockWord& rhs) {
+    return GetValue() == rhs.GetValue();
+  }
+
+ private:
+  explicit LockWord(uint32_t val) : value_(val) {}
+
+  uint32_t GetValue() const {
+    return value_;
+  }
+
+  // Only Object should be converting LockWords to/from uints.
+  friend class mirror::Object;
+
+  // The encoded value holding all the state.
+  uint32_t value_;
+};
+std::ostream& operator<<(std::ostream& os, const LockWord::LockState& code);
+
+}  // namespace art
+
+
+#endif  // ART_RUNTIME_LOCK_WORD_H_
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index eb73c7d..c60e714 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -20,6 +20,9 @@
 #include "array.h"
 
 #include "class.h"
+#include "gc/heap-inl.h"
+#include "thread.h"
+#include "utils.h"
 
 namespace art {
 namespace mirror {
@@ -33,6 +36,68 @@
   return header_size + data_size;
 }
 
+static inline size_t ComputeArraySize(Thread* self, Class* array_class, int32_t component_count,
+                                      size_t component_size)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(array_class != NULL);
+  DCHECK_GE(component_count, 0);
+  DCHECK(array_class->IsArrayClass());
+
+  size_t header_size = sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
+  size_t data_size = component_count * component_size;
+  size_t size = header_size + data_size;
+
+  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
+  size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size);
+  if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) {
+    self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
+                                             PrettyDescriptor(array_class).c_str(),
+                                             component_count).c_str());
+    return 0;  // failure
+  }
+  return size;
+}
+
+static inline Array* SetArrayLength(Array* array, size_t length) {
+  if (LIKELY(array != NULL)) {
+    DCHECK(array->IsArrayInstance());
+    array->SetLength(length);
+  }
+  return array;
+}
+
+inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                       size_t component_size) {
+  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
+  if (UNLIKELY(size == 0)) {
+    return NULL;
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  Array* array = down_cast<Array*>(heap->AllocObjectInstrumented(self, array_class, size));
+  return SetArrayLength(array, component_count);
+}
+
+inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                         size_t component_size) {
+  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
+  if (UNLIKELY(size == 0)) {
+    return NULL;
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  Array* array = down_cast<Array*>(heap->AllocObjectUninstrumented(self, array_class, size));
+  return SetArrayLength(array, component_count);
+}
+
+inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count) {
+  DCHECK(array_class->IsArrayClass());
+  return AllocInstrumented(self, array_class, component_count, array_class->GetComponentSize());
+}
+
+inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count) {
+  DCHECK(array_class->IsArrayClass());
+  return AllocUninstrumented(self, array_class, component_count, array_class->GetComponentSize());
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 88cd309..020085d 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -32,39 +32,6 @@
 namespace art {
 namespace mirror {
 
-Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                    size_t component_size) {
-  DCHECK(array_class != NULL);
-  DCHECK_GE(component_count, 0);
-  DCHECK(array_class->IsArrayClass());
-
-  size_t header_size = sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
-  size_t data_size = component_count * component_size;
-  size_t size = header_size + data_size;
-
-  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
-  size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size);
-  if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) {
-    self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
-                                             PrettyDescriptor(array_class).c_str(),
-                                             component_count).c_str());
-    return NULL;
-  }
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObject(self, array_class, size));
-  if (array != NULL) {
-    DCHECK(array->IsArrayInstance());
-    array->SetLength(component_count);
-  }
-  return array;
-}
-
-Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
-  DCHECK(array_class->IsArrayClass());
-  return Alloc(self, array_class, component_count, array_class->GetComponentSize());
-}
-
 // Create a multi-dimensional array of Objects or primitive types.
 //
 // We have to generate the names for X[], X[][], X[][][], and so on.  The
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index db6132d..570dcaa 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -27,10 +27,24 @@
   // A convenience for code that doesn't know the component size,
   // and doesn't want to have to work it out itself.
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocInstrumented(self, array_class, component_count);
+  }
+  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
                       size_t component_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocInstrumented(self, array_class, component_count, component_size);
+  }
+  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                    size_t component_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                  size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 1e11387..88cffb7 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -23,6 +23,7 @@
 #include "art_method.h"
 #include "class_loader.h"
 #include "dex_cache.h"
+#include "gc/heap-inl.h"
 #include "iftable.h"
 #include "object_array-inl.h"
 #include "runtime.h"
@@ -342,6 +343,24 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, name_), name, false);
 }
 
+inline void Class::CheckObjectAlloc() {
+  DCHECK(!IsArrayClass()) << PrettyClass(this);
+  DCHECK(IsInstantiable()) << PrettyClass(this);
+  // TODO: decide whether we want this check. It currently fails during bootstrap.
+  // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
+  DCHECK_GE(this->object_size_, sizeof(Object));
+}
+
+inline Object* Class::AllocObjectInstrumented(Thread* self) {
+  CheckObjectAlloc();
+  return Runtime::Current()->GetHeap()->AllocObjectInstrumented(self, this, this->object_size_);
+}
+
+inline Object* Class::AllocObjectUninstrumented(Thread* self) {
+  CheckObjectAlloc();
+  return Runtime::Current()->GetHeap()->AllocObjectUninstrumented(self, this, this->object_size_);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index add7e1b..2b0b1e1 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -60,7 +60,7 @@
     }
     if (new_status >= kStatusResolved || old_status >= kStatusResolved) {
       // When classes are being resolved the resolution code should hold the lock.
-      CHECK_EQ(GetThinLockId(), self->GetThinLockId())
+      CHECK_EQ(GetLockOwnerThreadId(), self->GetThreadId())
             << "Attempt to change status of class while not holding its lock: "
             << PrettyClass(this) << " " << old_status << " -> " << new_status;
     }
@@ -118,15 +118,6 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache, false);
 }
 
-Object* Class::AllocObject(Thread* self) {
-  DCHECK(!IsArrayClass()) << PrettyClass(this);
-  DCHECK(IsInstantiable()) << PrettyClass(this);
-  // TODO: decide whether we want this check. It currently fails during bootstrap.
-  // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
-  DCHECK_GE(this->object_size_, sizeof(Object));
-  return Runtime::Current()->GetHeap()->AllocObject(self, this, this->object_size_);
-}
-
 void Class::SetClassSize(size_t new_class_size) {
   if (kIsDebugBuild && (new_class_size < GetClassSize())) {
     DumpClass(LOG(ERROR), kDumpClassFullDetail);
@@ -144,7 +135,7 @@
   if (name != NULL) {
     return name;
   }
-  std::string descriptor(ClassHelper(this).GetDescriptor());
+  std::string descriptor(ClassHelper(this).GetDescriptorAsStringPiece().as_string());
   if ((descriptor[0] != 'L') && (descriptor[0] != '[')) {
     // The descriptor indicates that this is the class for
     // a primitive type; special-case the return value.
@@ -303,8 +294,8 @@
     return true;
   }
   // Compare the package part of the descriptor string.
-  return IsInSamePackage(ClassHelper(klass1).GetDescriptor(),
-                         ClassHelper(klass2).GetDescriptor());
+  return IsInSamePackage(ClassHelper(klass1).GetDescriptorAsStringPiece(),
+                         ClassHelper(klass2).GetDescriptorAsStringPiece());
 }
 
 bool Class::IsClassClass() const {
@@ -334,7 +325,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), new_class_loader, false);
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature) const {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) const {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
   if (method != NULL) {
@@ -370,13 +361,24 @@
   return NULL;
 }
 
-
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const {
   MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
     mh.ChangeMethod(method);
-    if (name == mh.GetName() && signature == mh.GetSignature()) {
+    if (name == mh.GetNameAsStringPiece() && mh.GetSignature() == signature) {
+      return method;
+    }
+  }
+  return NULL;
+}
+
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) const {
+  MethodHelper mh;
+  for (size_t i = 0; i < NumDirectMethods(); ++i) {
+    ArtMethod* method = GetDirectMethod(i);
+    mh.ChangeMethod(method);
+    if (name == mh.GetNameAsStringPiece() && signature == mh.GetSignature()) {
       return method;
     }
   }
@@ -405,6 +407,16 @@
   return NULL;
 }
 
+ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) const {
+  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+    ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
+    if (method != NULL) {
+      return method;
+    }
+  }
+  return NULL;
+}
+
 ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
   for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx);
@@ -415,13 +427,25 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
-                                         const StringPiece& signature) const {
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const {
   MethodHelper mh;
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
     mh.ChangeMethod(method);
-    if (name == mh.GetName() && signature == mh.GetSignature()) {
+    if (name == mh.GetNameAsStringPiece() && mh.GetSignature() == signature) {
+      return method;
+    }
+  }
+  return NULL;
+}
+
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
+                                            const Signature& signature) const {
+  MethodHelper mh;
+  for (size_t i = 0; i < NumVirtualMethods(); ++i) {
+    ArtMethod* method = GetVirtualMethod(i);
+    mh.ChangeMethod(method);
+    if (name == mh.GetNameAsStringPiece() && signature == mh.GetSignature()) {
       return method;
     }
   }
@@ -450,6 +474,16 @@
   return NULL;
 }
 
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) const {
+  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+    ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
+    if (method != NULL) {
+      return method;
+    }
+  }
+  return NULL;
+}
+
 ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
   for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
@@ -460,6 +494,22 @@
   return NULL;
 }
 
+ArtMethod* Class::FindClassInitializer() const {
+  for (size_t i = 0; i < NumDirectMethods(); ++i) {
+    ArtMethod* method = GetDirectMethod(i);
+    if (method->IsConstructor() && method->IsStatic()) {
+      if (kIsDebugBuild) {
+        MethodHelper mh(method);
+        CHECK(mh.IsClassInitializer());
+        CHECK_STREQ(mh.GetName(), "<clinit>");
+        CHECK_STREQ(mh.GetSignature().ToString().c_str(), "()V");
+      }
+      return method;
+    }
+  }
+  return NULL;
+}
+
 ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class?
   // Interfaces are not relevant because they can't contain instance fields.
@@ -467,7 +517,7 @@
   for (size_t i = 0; i < NumInstanceFields(); ++i) {
     ArtField* f = GetInstanceField(i);
     fh.ChangeField(f);
-    if (name == fh.GetName() && type == fh.GetTypeDescriptor()) {
+    if (name == fh.GetNameAsStringPiece() && type == fh.GetTypeDescriptorAsStringPiece()) {
       return f;
     }
   }
@@ -516,7 +566,7 @@
   for (size_t i = 0; i < NumStaticFields(); ++i) {
     ArtField* f = GetStaticField(i);
     fh.ChangeField(f);
-    if (name == fh.GetName() && type == fh.GetTypeDescriptor()) {
+    if (name == fh.GetNameAsStringPiece() && type == fh.GetTypeDescriptorAsStringPiece()) {
       return f;
     }
   }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index d97b603..dbc6f57 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -59,6 +59,7 @@
 
 struct ClassClassOffsets;
 struct ClassOffsets;
+class Signature;
 class StringPiece;
 
 namespace mirror {
@@ -246,7 +247,7 @@
     } else {
       Class* component = GetComponentType();
       if (component->IsPrimitive()) {
-        return false;
+        return true;
       } else {
         return component->CannotBeAssignedFromOtherTypes();
       }
@@ -345,14 +346,18 @@
 
   bool IsArtMethodClass() const;
 
+  static MemberOffset ComponentTypeOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
+  }
+
   Class* GetComponentType() const {
-    return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, component_type_), false);
+    return GetFieldObject<Class*>(ComponentTypeOffset(), false);
   }
 
   void SetComponentType(Class* new_component_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(GetComponentType() == NULL);
     DCHECK(new_component_type != NULL);
-    SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, component_type_), new_component_type, false);
+    SetFieldObject(ComponentTypeOffset(), new_component_type, false);
   }
 
   size_t GetComponentSize() const {
@@ -371,7 +376,12 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  Object* AllocObject(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocObject(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocObjectInstrumented(self);
+  }
+
+  Object* AllocObjectUninstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocObjectInstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVariableSize() const {
     // Classes and arrays vary in size, and so the object_size_ field cannot
@@ -560,39 +570,53 @@
   ArtMethod* FindVirtualMethodForInterface(ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE;
 
-  ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& descriptor) const
+  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& descriptor) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* FindClassInitializer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   int32_t GetIfTableCount() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   IfTable* GetIfTable() const;
@@ -764,6 +788,8 @@
   bool IsAssignableFromArray(const Class* klass) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // defining class loader, or NULL for the "bootstrap" system loader
   ClassLoader* class_loader_;
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5ed3db3..e659108 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -24,6 +24,7 @@
 #include "atomic.h"
 #include "array-inl.h"
 #include "class.h"
+#include "lock_word-inl.h"
 #include "monitor.h"
 #include "runtime.h"
 #include "throwable.h"
@@ -43,8 +44,21 @@
   SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
-inline uint32_t Object::GetThinLockId() {
-  return Monitor::GetThinLockId(monitor_);
+inline LockWord Object::GetLockWord() {
+  return LockWord(GetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), true));
+}
+
+inline void Object::SetLockWord(LockWord new_val) {
+  SetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), new_val.GetValue(), true);
+}
+
+inline bool Object::CasLockWord(LockWord old_val, LockWord new_val) {
+  return CasField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(),
+                    new_val.GetValue());
+}
+
+inline uint32_t Object::GetLockOwnerThreadId() {
+  return Monitor::GetLockOwnerThreadId(this);
 }
 
 inline void Object::MonitorEnter(Thread* self) {
@@ -238,6 +252,13 @@
   return result;
 }
 
+inline bool Object::CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  int32_t* addr = reinterpret_cast<int32_t*>(raw_addr);
+  return android_atomic_release_cas(old_value, new_value, addr) == 0;
+}
+
 inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index e105525..e3f5c10 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -26,6 +26,7 @@
 namespace art {
 
 class ImageWriter;
+class LockWord;
 struct ObjectOffsets;
 class Thread;
 
@@ -95,14 +96,10 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
-  volatile int32_t* GetRawLockWordAddress() {
-    byte* raw_addr = reinterpret_cast<byte*>(this) +
-        OFFSET_OF_OBJECT_MEMBER(Object, monitor_).Int32Value();
-    int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
-    return const_cast<volatile int32_t*>(word_addr);
-  }
-
-  uint32_t GetThinLockId();
+  LockWord GetLockWord();
+  void SetLockWord(LockWord new_val);
+  bool CasLockWord(LockWord old_val, LockWord new_val);
+  uint32_t GetLockOwnerThreadId();
 
   void MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
@@ -189,6 +186,11 @@
     }
   }
 
+  Object** GetFieldObjectAddr(MemberOffset field_offset) ALWAYS_INLINE {
+    VerifyObject(this);
+    return reinterpret_cast<Object**>(reinterpret_cast<byte*>(this) + field_offset.Int32Value());
+  }
+
   uint32_t GetField32(MemberOffset field_offset, bool is_volatile) const {
     VerifyObject(this);
     const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
@@ -221,6 +223,8 @@
     }
   }
 
+  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value);
+
   uint64_t GetField64(MemberOffset field_offset, bool is_volatile) const;
 
   void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile);
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index b8765af..4c5f90c 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -71,12 +71,20 @@
 
 // Keep the assembly code in sync
 TEST_F(ObjectTest, AsmConstants) {
-  ASSERT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
-  ASSERT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
-  ASSERT_EQ(STRING_OFFSET_OFFSET, String::OffsetOffset().Int32Value());
-  ASSERT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
+  EXPECT_EQ(CLASS_OFFSET, Object::ClassOffset().Int32Value());
+  EXPECT_EQ(LOCK_WORD_OFFSET, Object::MonitorOffset().Int32Value());
 
-  ASSERT_EQ(METHOD_CODE_OFFSET, ArtMethod::EntryPointFromCompiledCodeOffset().Int32Value());
+  EXPECT_EQ(CLASS_COMPONENT_TYPE_OFFSET, Class::ComponentTypeOffset().Int32Value());
+
+  EXPECT_EQ(ARRAY_LENGTH_OFFSET, Array::LengthOffset().Int32Value());
+  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(Object*)).Int32Value());
+
+  EXPECT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
+  EXPECT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
+  EXPECT_EQ(STRING_OFFSET_OFFSET, String::OffsetOffset().Int32Value());
+  EXPECT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
+
+  EXPECT_EQ(METHOD_CODE_OFFSET, ArtMethod::EntryPointFromCompiledCodeOffset().Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
@@ -262,7 +270,7 @@
 
   Class* klass =
       class_linker_->FindClass("LStaticsFromCode;", soa.Decode<ClassLoader*>(class_loader));
-  ArtMethod* clinit = klass->FindDirectMethod("<clinit>", "()V");
+  ArtMethod* clinit = klass->FindClassInitializer();
   const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
   ASSERT_TRUE(klass_string_id != NULL);
   const DexFile::TypeId* klass_type_id = dex_file->FindTypeId(
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index a505ed0..9d76c6b 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -17,6 +17,7 @@
 #include "stack_trace_element.h"
 
 #include "class.h"
+#include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "object-inl.h"
 #include "string.h"
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index f8a0e53..9c93f17 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -17,13 +17,14 @@
 #include "string.h"
 
 #include "array.h"
+#include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
 #include "runtime.h"
 #include "sirt_ref.h"
 #include "thread.h"
-#include "utf.h"
+#include "utf-inl.h"
 
 namespace art {
 namespace mirror {
@@ -32,6 +33,10 @@
   return GetFieldObject<const CharArray*>(ValueOffset(), false);
 }
 
+CharArray* String::GetCharArray() {
+  return GetFieldObject<CharArray*>(ValueOffset(), false);
+}
+
 void String::ComputeHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   SetHashCode(ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()));
 }
@@ -285,4 +290,3 @@
 
 }  // namespace mirror
 }  // namespace art
-
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 81fe42f..1879f04 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -44,6 +44,7 @@
   }
 
   const CharArray* GetCharArray() const;
+  CharArray* GetCharArray();
 
   int32_t GetOffset() const {
     int32_t result = GetField32(OffsetOffset(), false);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 088d1f7..a5605ff 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -23,6 +23,7 @@
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
+#include "lock_word-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -37,36 +38,20 @@
 namespace art {
 
 /*
- * Every Object has a monitor associated with it, but not every Object is
- * actually locked.  Even the ones that are locked do not need a
- * full-fledged monitor until a) there is actual contention or b) wait()
- * is called on the Object.
+ * Every Object has a monitor associated with it, but not every Object is actually locked.  Even
+ * the ones that are locked do not need a full-fledged monitor until a) there is actual contention
+ * or b) wait() is called on the Object.
  *
- * For Android, we have implemented a scheme similar to the one described
- * in Bacon et al.'s "Thin locks: featherweight synchronization for Java"
- * (ACM 1998).  Things are even easier for us, though, because we have
- * a full 32 bits to work with.
+ * For Android, we have implemented a scheme similar to the one described in Bacon et al.'s
+ * "Thin locks: featherweight synchronization for Java" (ACM 1998).  Things are even easier for us,
+ * though, because we have a full 32 bits to work with.
  *
- * The two states of an Object's lock are referred to as "thin" and
- * "fat".  A lock may transition from the "thin" state to the "fat"
- * state and this transition is referred to as inflation.  Once a lock
- * has been inflated it remains in the "fat" state indefinitely.
+ * The two states of an Object's lock are referred to as "thin" and "fat".  A lock may transition
+ * from the "thin" state to the "fat" state and this transition is referred to as inflation. Once
+ * a lock has been inflated it remains in the "fat" state indefinitely.
  *
- * The lock value itself is stored in Object.lock.  The LSB of the
- * lock encodes its state.  When cleared, the lock is in the "thin"
- * state and its bits are formatted as follows:
- *
- *    [31 ---- 19] [18 ---- 3] [2 ---- 1] [0]
- *     lock count   thread id  hash state  0
- *
- * When set, the lock is in the "fat" state and its bits are formatted
- * as follows:
- *
- *    [31 ---- 3] [2 ---- 1] [0]
- *      pointer   hash state  1
- *
- * For an in-depth description of the mechanics of thin-vs-fat locking,
- * read the paper referred to above.
+ * The lock value itself is stored in mirror::Object::monitor_ and the representation is described
+ * in the LockWord value type.
  *
  * Monitors provide:
  *  - mutually exclusive access to resources
@@ -74,32 +59,11 @@
  *
  * In effect, they fill the role of both mutexes and condition variables.
  *
- * Only one thread can own the monitor at any time.  There may be several
- * threads waiting on it (the wait call unlocks it).  One or more waiting
- * threads may be getting interrupted or notified at any given time.
- *
- * TODO: the various members of monitor are not SMP-safe.
+ * Only one thread can own the monitor at any time.  There may be several threads waiting on it
+ * (the wait call unlocks it).  One or more waiting threads may be getting interrupted or notified
+ * at any given time.
  */
 
-// The shape is the bottom bit; either LW_SHAPE_THIN or LW_SHAPE_FAT.
-#define LW_SHAPE_MASK 0x1
-#define LW_SHAPE(x) static_cast<int>((x) & LW_SHAPE_MASK)
-
-/*
- * Monitor accessor.  Extracts a monitor structure pointer from a fat
- * lock.  Performs no error checking.
- */
-#define LW_MONITOR(x) \
-  (reinterpret_cast<Monitor*>((x) & ~((LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT) | LW_SHAPE_MASK)))
-
-/*
- * Lock recursion count field.  Contains a count of the number of times
- * a lock has been recursively acquired.
- */
-#define LW_LOCK_COUNT_MASK 0x1fff
-#define LW_LOCK_COUNT_SHIFT 19
-#define LW_LOCK_COUNT(x) (((x) >> LW_LOCK_COUNT_SHIFT) & LW_LOCK_COUNT_MASK)
-
 bool (*Monitor::is_sensitive_thread_hook_)() = NULL;
 uint32_t Monitor::lock_profiling_threshold_ = 0;
 
@@ -117,29 +81,43 @@
 
 Monitor::Monitor(Thread* owner, mirror::Object* obj)
     : monitor_lock_("a monitor lock", kMonitorLock),
+      monitor_contenders_("monitor contenders", monitor_lock_),
       owner_(owner),
       lock_count_(0),
       obj_(obj),
       wait_set_(NULL),
       locking_method_(NULL),
       locking_dex_pc_(0) {
-  monitor_lock_.Lock(owner);
+  // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
+  // with the owner unlocking the thin-lock.
+  CHECK(owner == Thread::Current() || owner->IsSuspended());
+}
+
+bool Monitor::Install(Thread* self) {
+  MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
+  CHECK(owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
-  uint32_t thin = *obj->GetRawLockWordAddress();
-  lock_count_ = LW_LOCK_COUNT(thin);
-  thin &= LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT;
-  thin |= reinterpret_cast<uint32_t>(this) | LW_SHAPE_FAT;
-  // Publish the updated lock word.
-  android_atomic_release_store(thin, obj->GetRawLockWordAddress());
-  // Lock profiling.
-  if (lock_profiling_threshold_ != 0) {
-    locking_method_ = owner->GetCurrentMethod(&locking_dex_pc_);
+  LockWord thin(obj_->GetLockWord());
+  if (thin.GetState() != LockWord::kThinLocked) {
+    // The owner_ is suspended but another thread beat us to install a monitor.
+    CHECK_EQ(thin.GetState(), LockWord::kFatLocked);
+    return false;
   }
+  CHECK_EQ(owner_->GetThreadId(), thin.ThinLockOwner());
+  lock_count_ = thin.ThinLockCount();
+  LockWord fat(this);
+  // Publish the updated lock word, which may race with other threads.
+  bool success = obj_->CasLockWord(thin, fat);
+  // Lock profiling.
+  if (success && lock_profiling_threshold_ != 0) {
+    locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
+  }
+  return success;
 }
 
 Monitor::~Monitor() {
-  DCHECK(obj_ != NULL);
-  DCHECK_EQ(LW_SHAPE(*obj_->GetRawLockWordAddress()), LW_SHAPE_FAT);
+  CHECK(obj_ != NULL);
+  CHECK_EQ(obj_->GetLockWord().GetState(), LockWord::kFatLocked);
 }
 
 /*
@@ -190,60 +168,56 @@
   }
 }
 
-mirror::Object* Monitor::GetObject() {
-  return obj_;
+void Monitor::SetObject(mirror::Object* object) {
+  obj_ = object;
 }
 
 void Monitor::Lock(Thread* self) {
-  if (owner_ == self) {
-    lock_count_++;
-    return;
-  }
-
-  if (!monitor_lock_.TryLock(self)) {
-    uint64_t waitStart = 0;
-    uint64_t waitEnd = 0;
-    uint32_t wait_threshold = lock_profiling_threshold_;
-    const mirror::ArtMethod* current_locking_method = NULL;
-    uint32_t current_locking_dex_pc = 0;
+  MutexLock mu(self, monitor_lock_);
+  while (true) {
+    if (owner_ == NULL) {  // Unowned.
+      owner_ = self;
+      CHECK_EQ(lock_count_, 0);
+      // When debugging, save the current monitor holder for future
+      // acquisition failures to use in sampled logging.
+      if (lock_profiling_threshold_ != 0) {
+        locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+      }
+      return;
+    } else if (owner_ == self) {  // Recursive.
+      lock_count_++;
+      return;
+    }
+    // Contended.
+    const bool log_contention = (lock_profiling_threshold_ != 0);
+    uint64_t wait_start_ms = log_contention ? 0 : MilliTime();
+    const mirror::ArtMethod* owners_method = locking_method_;
+    uint32_t owners_dex_pc = locking_dex_pc_;
+    monitor_lock_.Unlock(self);  // Let go of locks in order.
     {
-      ScopedThreadStateChange tsc(self, kBlocked);
-      if (wait_threshold != 0) {
-        waitStart = NanoTime() / 1000;
-      }
-      current_locking_method = locking_method_;
-      current_locking_dex_pc = locking_dex_pc_;
-
-      monitor_lock_.Lock(self);
-      if (wait_threshold != 0) {
-        waitEnd = NanoTime() / 1000;
+      ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
+      MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
+      if (owner_ != NULL) {  // Did the owner_ give the lock up?
+        monitor_contenders_.Wait(self);  // Still contended so wait.
+        // Woken from contention.
+        if (log_contention) {
+          uint64_t wait_ms = MilliTime() - wait_start_ms;
+          uint32_t sample_percent;
+          if (wait_ms >= lock_profiling_threshold_) {
+            sample_percent = 100;
+          } else {
+            sample_percent = 100 * wait_ms / lock_profiling_threshold_;
+          }
+          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
+            const char* owners_filename;
+            uint32_t owners_line_number;
+            TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
+            LogContentionEvent(self, wait_ms, sample_percent, owners_filename, owners_line_number);
+          }
+        }
       }
     }
-
-    if (wait_threshold != 0) {
-      uint64_t wait_ms = (waitEnd - waitStart) / 1000;
-      uint32_t sample_percent;
-      if (wait_ms >= wait_threshold) {
-        sample_percent = 100;
-      } else {
-        sample_percent = 100 * wait_ms / wait_threshold;
-      }
-      if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
-        const char* current_locking_filename;
-        uint32_t current_locking_line_number;
-        TranslateLocation(current_locking_method, current_locking_dex_pc,
-                          current_locking_filename, current_locking_line_number);
-        LogContentionEvent(self, wait_ms, sample_percent, current_locking_filename, current_locking_line_number);
-      }
-    }
-  }
-  owner_ = self;
-  DCHECK_EQ(lock_count_, 0);
-
-  // When debugging, save the current monitor holder for future
-  // acquisition failures to use in sampled logging.
-  if (lock_profiling_threshold_ != 0) {
-    locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+    monitor_lock_.Lock(self);  // Reacquire locks in order.
   }
 }
 
@@ -257,11 +231,11 @@
   Thread* self = Thread::Current();
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   self->ThrowNewExceptionV(throw_location, "Ljava/lang/IllegalMonitorStateException;", fmt, args);
-  if (!Runtime::Current()->IsStarted()) {
+  if (!Runtime::Current()->IsStarted() || VLOG_IS_ON(monitor)) {
     std::ostringstream ss;
     self->Dump(ss);
-    std::string str(ss.str());
-    LOG(ERROR) << "IllegalMonitorStateException: " << str;
+    LOG(Runtime::Current()->IsStarted() ? INFO : ERROR)
+        << self->GetException(NULL)->Dump() << "\n" << ss.str();
   }
   va_end(args);
 }
@@ -287,7 +261,7 @@
     // Acquire thread list lock so threads won't disappear from under us.
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
     // Re-read owner now that we hold lock.
-    current_owner = (monitor != NULL) ? monitor->owner_ : NULL;
+    current_owner = (monitor != NULL) ? monitor->GetOwner() : NULL;
     // Get short descriptions of the threads involved.
     current_owner_string = ThreadToString(current_owner);
     expected_owner_string = ThreadToString(expected_owner);
@@ -335,8 +309,9 @@
   }
 }
 
-bool Monitor::Unlock(Thread* self, bool for_wait) {
+bool Monitor::Unlock(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   Thread* owner = owner_;
   if (owner == self) {
     // We own the monitor, so nobody else can be in here.
@@ -344,17 +319,11 @@
       owner_ = NULL;
       locking_method_ = NULL;
       locking_dex_pc_ = 0;
-      monitor_lock_.Unlock(self);
+      // Wake a contender.
+      monitor_contenders_.Signal(self);
     } else {
       --lock_count_;
     }
-  } else if (for_wait) {
-    // Wait should have already cleared the fields.
-    DCHECK_EQ(lock_count_, 0);
-    DCHECK(owner == NULL);
-    DCHECK(locking_method_ == NULL);
-    DCHECK_EQ(locking_dex_pc_, 0u);
-    monitor_lock_.Unlock(self);
   } else {
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
@@ -393,12 +362,14 @@
   DCHECK(self != NULL);
   DCHECK(why == kTimedWaiting || why == kWaiting || why == kSleeping);
 
+  monitor_lock_.Lock(self);
+
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
+    monitor_lock_.Unlock(self);
     return;
   }
-  monitor_lock_.AssertHeld(self);
 
   // We need to turn a zero-length timed wait into a regular wait because
   // Object.wait(0, 0) is defined as Object.wait(0), which is defined as Object.wait().
@@ -406,16 +377,12 @@
     why = kWaiting;
   }
 
-  WaitWithLock(self, ms, ns, interruptShouldThrow, why);
-}
-
-void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns,
-                           bool interruptShouldThrow, ThreadState why) {
   // Enforce the timeout range.
   if (ms < 0 || ns < 0 || ns > 999999) {
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
     self->ThrowNewExceptionF(throw_location, "Ljava/lang/IllegalArgumentException;",
                              "timeout arguments out of range: ms=%lld ns=%d", ms, ns);
+    monitor_lock_.Unlock(self);
     return;
   }
 
@@ -457,7 +424,8 @@
     self->wait_monitor_ = this;
 
     // Release the monitor lock.
-    Unlock(self, true);
+    monitor_contenders_.Signal(self);
+    monitor_lock_.Unlock(self);
 
     // Handle the case where the thread was interrupted before we called wait().
     if (self->interrupted_) {
@@ -490,9 +458,9 @@
     self->wait_monitor_ = NULL;
   }
 
-  // Re-acquire the monitor lock.
+  // Re-acquire the monitor and lock.
   Lock(self);
-
+  monitor_lock_.Lock(self);
   self->wait_mutex_->AssertNotHeld(self);
 
   /*
@@ -524,20 +492,17 @@
       self->ThrowNewException(throw_location, "Ljava/lang/InterruptedException;", NULL);
     }
   }
+  monitor_lock_.Unlock(self);
 }
 
 void Monitor::Notify(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
     return;
   }
-  monitor_lock_.AssertHeld(self);
-  NotifyWithLock(self);
-}
-
-void Monitor::NotifyWithLock(Thread* self) {
   // Signal the first waiting thread in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -555,16 +520,12 @@
 
 void Monitor::NotifyAll(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
     return;
   }
-  monitor_lock_.AssertHeld(self);
-  NotifyAllWithLock();
-}
-
-void Monitor::NotifyAllWithLock() {
   // Signal all threads in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -575,182 +536,130 @@
 }
 
 /*
- * Changes the shape of a monitor from thin to fat, preserving the
- * internal lock state. The calling thread must own the lock.
+ * Changes the shape of a monitor from thin to fat, preserving the internal lock state. The calling
+ * thread must own the lock or the owner must be suspended. There's a race with other threads
+ * inflating the lock and so the caller should read the monitor following the call.
  */
-void Monitor::Inflate(Thread* self, mirror::Object* obj) {
+void Monitor::Inflate(Thread* self, Thread* owner, mirror::Object* obj) {
   DCHECK(self != NULL);
+  DCHECK(owner != NULL);
   DCHECK(obj != NULL);
-  DCHECK_EQ(LW_SHAPE(*obj->GetRawLockWordAddress()), LW_SHAPE_THIN);
-  DCHECK_EQ(LW_LOCK_OWNER(*obj->GetRawLockWordAddress()), static_cast<int32_t>(self->GetThinLockId()));
 
   // Allocate and acquire a new monitor.
-  Monitor* m = new Monitor(self, obj);
-  VLOG(monitor) << "monitor: thread " << self->GetThinLockId()
-                << " created monitor " << m << " for object " << obj;
-  Runtime::Current()->GetMonitorList()->Add(m);
+  UniquePtr<Monitor> m(new Monitor(owner, obj));
+  if (m->Install(self)) {
+    VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
+                    << " created monitor " << m.get() << " for object " << obj;
+    Runtime::Current()->GetMonitorList()->Add(m.release());
+  }
+  CHECK_EQ(obj->GetLockWord().GetState(), LockWord::kFatLocked);
 }
 
 void Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
-  uint32_t sleepDelayNs;
-  uint32_t minSleepDelayNs = 1000000;  /* 1 millisecond */
-  uint32_t maxSleepDelayNs = 1000000000;  /* 1 second */
-  uint32_t thin, newThin;
-
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
-  uint32_t threadId = self->GetThinLockId();
- retry:
-  thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    /*
-     * The lock is a thin lock.  The owner field is used to
-     * determine the acquire method, ordered by cost.
-     */
-    if (LW_LOCK_OWNER(thin) == threadId) {
-      /*
-       * The calling thread owns the lock.  Increment the
-       * value of the recursion count field.
-       */
-      *thinp += 1 << LW_LOCK_COUNT_SHIFT;
-      if (LW_LOCK_COUNT(*thinp) == LW_LOCK_COUNT_MASK) {
-        /*
-         * The reacquisition limit has been reached.  Inflate
-         * the lock so the next acquire will not overflow the
-         * recursion count field.
-         */
-        Inflate(self, obj);
+  uint32_t thread_id = self->GetThreadId();
+  size_t contention_count = 0;
+
+  while (true) {
+    LockWord lock_word = obj->GetLockWord();
+    switch (lock_word.GetState()) {
+      case LockWord::kUnlocked: {
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
+        if (obj->CasLockWord(lock_word, thin_locked)) {
+          return;  // Success!
+        }
+        continue;  // Go again.
       }
-    } else if (LW_LOCK_OWNER(thin) == 0) {
-      // The lock is unowned. Install the thread id of the calling thread into the owner field.
-      // This is the common case: compiled code will have tried this before calling back into
-      // the runtime.
-      newThin = thin | (threadId << LW_LOCK_OWNER_SHIFT);
-      if (android_atomic_acquire_cas(thin, newThin, thinp) != 0) {
-        // The acquire failed. Try again.
-        goto retry;
-      }
-    } else {
-      VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p (a %s) owned by %d",
-                                    threadId, thinp, PrettyTypeOf(obj).c_str(), LW_LOCK_OWNER(thin));
-      // The lock is owned by another thread. Notify the runtime that we are about to wait.
-      self->monitor_enter_object_ = obj;
-      self->TransitionFromRunnableToSuspended(kBlocked);
-      // Spin until the thin lock is released or inflated.
-      sleepDelayNs = 0;
-      for (;;) {
-        thin = *thinp;
-        // Check the shape of the lock word. Another thread
-        // may have inflated the lock while we were waiting.
-        if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-          if (LW_LOCK_OWNER(thin) == 0) {
-            // The lock has been released. Install the thread id of the
-            // calling thread into the owner field.
-            newThin = thin | (threadId << LW_LOCK_OWNER_SHIFT);
-            if (android_atomic_acquire_cas(thin, newThin, thinp) == 0) {
-              // The acquire succeed. Break out of the loop and proceed to inflate the lock.
-              break;
-            }
+      case LockWord::kThinLocked: {
+        uint32_t owner_thread_id = lock_word.ThinLockOwner();
+        if (owner_thread_id == thread_id) {
+          // We own the lock, increase the recursion count.
+          uint32_t new_count = lock_word.ThinLockCount() + 1;
+          if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
+            obj->SetLockWord(thin_locked);
+            return;  // Success!
           } else {
-            // The lock has not been released. Yield so the owning thread can run.
-            if (sleepDelayNs == 0) {
-              sched_yield();
-              sleepDelayNs = minSleepDelayNs;
-            } else {
-              NanoSleep(sleepDelayNs);
-              // Prepare the next delay value. Wrap to avoid once a second polls for eternity.
-              if (sleepDelayNs < maxSleepDelayNs / 2) {
-                sleepDelayNs *= 2;
-              } else {
-                sleepDelayNs = minSleepDelayNs;
+            // We'd overflow the recursion count, so inflate the monitor.
+            Inflate(self, self, obj);
+          }
+        } else {
+          // Contention.
+          contention_count++;
+          if (contention_count <= Runtime::Current()->GetMaxSpinsBeforeThinkLockInflation()) {
+            NanoSleep(1000);  // Sleep for 1us and re-attempt.
+          } else {
+            contention_count = 0;
+            // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
+            ScopedThreadStateChange tsc(self, kBlocked);
+            bool timed_out;
+            ThreadList* thread_list = Runtime::Current()->GetThreadList();
+            if (lock_word == obj->GetLockWord()) {  // If lock word hasn't changed.
+              Thread* owner = thread_list->SuspendThreadByThreadId(lock_word.ThinLockOwner(), false,
+                                                                   &timed_out);
+              if (owner != NULL) {
+                // We succeeded in suspending the thread, check the lock's status didn't change.
+                lock_word = obj->GetLockWord();
+                if (lock_word.GetState() == LockWord::kThinLocked &&
+                    lock_word.ThinLockOwner() == owner_thread_id) {
+                  // Go ahead and inflate the lock.
+                  Inflate(self, owner, obj);
+                }
+                thread_list->Resume(owner, false);
               }
             }
           }
-        } else {
-          // The thin lock was inflated by another thread. Let the runtime know we are no longer
-          // waiting and try again.
-          VLOG(monitor) << StringPrintf("monitor: thread %d found lock %p surprise-fattened by another thread", threadId, thinp);
-          self->monitor_enter_object_ = NULL;
-          self->TransitionFromSuspendedToRunnable();
-          goto retry;
         }
+        continue;  // Start from the beginning.
       }
-      VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p done", threadId, thinp);
-      // We have acquired the thin lock. Let the runtime know that we are no longer waiting.
-      self->monitor_enter_object_ = NULL;
-      self->TransitionFromSuspendedToRunnable();
-      // Fatten the lock.
-      Inflate(self, obj);
-      VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p", threadId, thinp);
+      case LockWord::kFatLocked: {
+        Monitor* mon = lock_word.FatLockMonitor();
+        mon->Lock(self);
+        return;  // Success!
+      }
     }
-  } else {
-    // The lock is a fat lock.
-    VLOG(monitor) << StringPrintf("monitor: thread %d locking fat lock %p (%p) %p on a %s",
-                                  threadId, thinp, LW_MONITOR(*thinp),
-                                  reinterpret_cast<void*>(*thinp), PrettyTypeOf(obj).c_str());
-    DCHECK(LW_MONITOR(*thinp) != NULL);
-    LW_MONITOR(*thinp)->Lock(self);
   }
 }
 
 bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
-
   DCHECK(self != NULL);
-  // DCHECK_EQ(self->GetState(), kRunnable);
   DCHECK(obj != NULL);
 
-  /*
-   * Cache the lock word as its value can change while we are
-   * examining its state.
-   */
-  uint32_t thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    /*
-     * The lock is thin.  We must ensure that the lock is owned
-     * by the given thread before unlocking it.
-     */
-    if (LW_LOCK_OWNER(thin) == self->GetThinLockId()) {
-      /*
-       * We are the lock owner.  It is safe to update the lock
-       * without CAS as lock ownership guards the lock itself.
-       */
-      if (LW_LOCK_COUNT(thin) == 0) {
-        /*
-         * The lock was not recursively acquired, the common
-         * case.  Unlock by clearing all bits except for the
-         * hash state.
-         */
-        thin &= (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT);
-        android_atomic_release_store(thin, thinp);
-      } else {
-        /*
-         * The object was recursively acquired.  Decrement the
-         * lock recursion count field.
-         */
-        *thinp -= 1 << LW_LOCK_COUNT_SHIFT;
-      }
-    } else {
-      /*
-       * We do not own the lock.  The JVM spec requires that we
-       * throw an exception in this case.
-       */
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       FailedUnlock(obj, self, NULL, NULL);
-      return false;
+      return false;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        // TODO: there's a race here with the owner dying while we unlock.
+        Thread* owner =
+            Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+        FailedUnlock(obj, self, owner, NULL);
+        return false;  // Failure.
+      } else {
+        // We own the lock, decrease the recursion count.
+        if (lock_word.ThinLockCount() != 0) {
+          uint32_t new_count = lock_word.ThinLockCount() - 1;
+          LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
+          obj->SetLockWord(thin_locked);
+        } else {
+          obj->SetLockWord(LockWord());
+        }
+        return true;  // Success!
+      }
     }
-  } else {
-    /*
-     * The lock is fat.  We must check to see if Unlock has
-     * raised any exceptions before continuing.
-     */
-    DCHECK(LW_MONITOR(*thinp) != NULL);
-    if (!LW_MONITOR(*thinp)->Unlock(self, false)) {
-      // An exception has been raised.  Do not fall through.
-      return false;
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      return mon->Unlock(self);
     }
+    default:
+      LOG(FATAL) << "Unreachable";
+      return false;
   }
-  return true;
 }
 
 /*
@@ -758,84 +667,91 @@
  */
 void Monitor::Wait(Thread* self, mirror::Object *obj, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
+  DCHECK(self != NULL);
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, we need to fatten it.
-  uint32_t thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
-      return;
+      return;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
+        return;  // Failure.
+      } else {
+        // We own the lock, inflate to enqueue ourself on the Monitor.
+        Inflate(self, self, obj);
+        lock_word = obj->GetLockWord();
+      }
+      break;
     }
-
-    /* This thread holds the lock.  We need to fatten the lock
-     * so 'self' can block on it.  Don't update the object lock
-     * field yet, because 'self' needs to acquire the lock before
-     * any other thread gets a chance.
-     */
-    Inflate(self, obj);
-    VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p by wait()", self->GetThinLockId(), thinp);
+    case LockWord::kFatLocked:
+      break;  // Already set for a wait.
   }
-  LW_MONITOR(*thinp)->Wait(self, ms, ns, interruptShouldThrow, why);
+  Monitor* mon = lock_word.FatLockMonitor();
+  mon->Wait(self, ms, ns, interruptShouldThrow, why);
 }
 
-void Monitor::Notify(Thread* self, mirror::Object *obj) {
-  uint32_t thin = *obj->GetRawLockWordAddress();
+void Monitor::DoNotify(Thread* self, mirror::Object* obj, bool notify_all) {
+  DCHECK(self != NULL);
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, there aren't any waiters;
-  // waiting on an object forces lock fattening.
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
-      return;
+      return;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
+        return;  // Failure.
+      } else {
+        // We own the lock but there's no Monitor and therefore no waiters.
+        return;  // Success.
+      }
     }
-    // no-op;  there are no waiters to notify.
-    // We inflate here in case the Notify is in a tight loop. Without inflation here the waiter
-    // will struggle to get in. Bug 6961405.
-    Inflate(self, obj);
-  } else {
-    // It's a fat lock.
-    LW_MONITOR(thin)->Notify(self);
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      if (notify_all) {
+        mon->NotifyAll(self);
+      } else {
+        mon->Notify(self);
+      }
+      return;  // Success.
+    }
   }
 }
 
-void Monitor::NotifyAll(Thread* self, mirror::Object *obj) {
-  uint32_t thin = *obj->GetRawLockWordAddress();
+uint32_t Monitor::GetLockOwnerThreadId(mirror::Object* obj) {
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, there aren't any waiters;
-  // waiting on an object forces lock fattening.
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
-      ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
-      return;
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      return ThreadList::kInvalidThreadId;
+    case LockWord::kThinLocked:
+      return lock_word.ThinLockOwner();
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      return mon->GetOwnerThreadId();
     }
-    // no-op;  there are no waiters to notify.
-    // We inflate here in case the NotifyAll is in a tight loop. Without inflation here the waiter
-    // will struggle to get in. Bug 6961405.
-    Inflate(self, obj);
-  } else {
-    // It's a fat lock.
-    LW_MONITOR(thin)->NotifyAll(self);
-  }
-}
-
-uint32_t Monitor::GetThinLockId(uint32_t raw_lock_word) {
-  if (LW_SHAPE(raw_lock_word) == LW_SHAPE_THIN) {
-    return LW_LOCK_OWNER(raw_lock_word);
-  } else {
-    Thread* owner = LW_MONITOR(raw_lock_word)->owner_;
-    return owner ? owner->GetThinLockId() : 0;
+    default:
+      LOG(FATAL) << "Unreachable";
+      return ThreadList::kInvalidThreadId;
   }
 }
 
 void Monitor::DescribeWait(std::ostream& os, const Thread* thread) {
   ThreadState state = thread->GetState();
 
-  mirror::Object* object = NULL;
-  uint32_t lock_owner = ThreadList::kInvalidId;
+  int32_t object_identity_hashcode = 0;
+  uint32_t lock_owner = ThreadList::kInvalidThreadId;
+  std::string pretty_type;
   if (state == kWaiting || state == kTimedWaiting || state == kSleeping) {
     if (state == kSleeping) {
       os << "  - sleeping on ";
@@ -847,14 +763,18 @@
       MutexLock mu(self, *thread->wait_mutex_);
       Monitor* monitor = thread->wait_monitor_;
       if (monitor != NULL) {
-        object = monitor->obj_;
+        mirror::Object* object = monitor->obj_;
+        object_identity_hashcode = object->IdentityHashCode();
+        pretty_type = PrettyTypeOf(object);
       }
     }
   } else if (state == kBlocked) {
     os << "  - waiting to lock ";
-    object = thread->monitor_enter_object_;
+    mirror::Object* object = thread->monitor_enter_object_;
     if (object != NULL) {
-      lock_owner = object->GetThinLockId();
+      object_identity_hashcode = object->IdentityHashCode();
+      lock_owner = object->GetLockOwnerThreadId();
+      pretty_type = PrettyTypeOf(object);
     }
   } else {
     // We're not waiting on anything.
@@ -862,10 +782,10 @@
   }
 
   // - waiting on <0x6008c468> (a java.lang.Class<java.lang.ref.ReferenceQueue>)
-  os << "<" << object << "> (a " << PrettyTypeOf(object) << ")";
+  os << StringPrintf("<0x%08x> (a %s)", object_identity_hashcode, pretty_type.c_str());
 
   // - waiting to lock <0x613f83d8> (a java.lang.Object) held by thread 5
-  if (lock_owner != ThreadList::kInvalidId) {
+  if (lock_owner != ThreadList::kInvalidThreadId) {
     os << " held by thread " << lock_owner;
   }
 
@@ -876,18 +796,15 @@
   // This is used to implement JDWP's ThreadReference.CurrentContendedMonitor, and has a bizarre
   // definition of contended that includes a monitor a thread is trying to enter...
   mirror::Object* result = thread->monitor_enter_object_;
-  if (result != NULL) {
-    return result;
-  }
-  // ...but also a monitor that the thread is waiting on.
-  {
+  if (result == NULL) {
+    // ...but also a monitor that the thread is waiting on.
     MutexLock mu(Thread::Current(), *thread->wait_mutex_);
     Monitor* monitor = thread->wait_monitor_;
     if (monitor != NULL) {
-      return monitor->obj_;
+      result = monitor->GetObject();
     }
   }
-  return NULL;
+  return result;
 }
 
 void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::Object*, void*),
@@ -952,41 +869,56 @@
   }
 }
 
-bool Monitor::IsValidLockWord(int32_t lock_word) {
-  if (lock_word == 0) {
-    return true;
-  } else if (LW_SHAPE(lock_word) == LW_SHAPE_FAT) {
-    Monitor* mon = LW_MONITOR(lock_word);
-    MonitorList* list = Runtime::Current()->GetMonitorList();
-    MutexLock mu(Thread::Current(), list->monitor_list_lock_);
-    bool found = false;
-    for (Monitor* list_mon : list->list_) {
-      if (mon == list_mon) {
-        found = true;
-        break;
+bool Monitor::IsValidLockWord(LockWord lock_word) {
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      // Nothing to check.
+      return true;
+    case LockWord::kThinLocked:
+      // Basic sanity check of owner.
+      return lock_word.ThinLockOwner() != ThreadList::kInvalidThreadId;
+    case LockWord::kFatLocked: {
+      // Check the  monitor appears in the monitor list.
+      Monitor* mon = lock_word.FatLockMonitor();
+      MonitorList* list = Runtime::Current()->GetMonitorList();
+      MutexLock mu(Thread::Current(), list->monitor_list_lock_);
+      for (Monitor* list_mon : list->list_) {
+        if (mon == list_mon) {
+          return true;  // Found our monitor.
+        }
       }
+      return false;  // Fail - unowned monitor in an object.
     }
-    return found;
-  } else {
-    // TODO: thin lock validity checking.
-    return LW_SHAPE(lock_word) == LW_SHAPE_THIN;
+    default:
+      LOG(FATAL) << "Unreachable";
+      return false;
   }
 }
 
 void Monitor::TranslateLocation(const mirror::ArtMethod* method, uint32_t dex_pc,
-                                const char*& source_file, uint32_t& line_number) const {
+                                const char** source_file, uint32_t* line_number) const {
   // If method is null, location is unknown
   if (method == NULL) {
-    source_file = "";
-    line_number = 0;
+    *source_file = "";
+    *line_number = 0;
     return;
   }
   MethodHelper mh(method);
-  source_file = mh.GetDeclaringClassSourceFile();
-  if (source_file == NULL) {
-    source_file = "";
+  *source_file = mh.GetDeclaringClassSourceFile();
+  if (*source_file == NULL) {
+    *source_file = "";
   }
-  line_number = mh.GetLineNumFromDexPC(dex_pc);
+  *line_number = mh.GetLineNumFromDexPC(dex_pc);
+}
+
+uint32_t Monitor::GetOwnerThreadId() {
+  MutexLock mu(Thread::Current(), monitor_lock_);
+  Thread* owner = owner_;
+  if (owner != NULL) {
+    return owner->GetThreadId();
+  } else {
+    return ThreadList::kInvalidThreadId;
+  }
 }
 
 MonitorList::MonitorList()
@@ -1020,36 +952,44 @@
   list_.push_front(m);
 }
 
-void MonitorList::SweepMonitorList(IsMarkedTester is_marked, void* arg) {
+void MonitorList::SweepMonitorList(RootVisitor visitor, void* arg) {
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
-    if (!is_marked(m->GetObject(), arg)) {
-      VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object " << m->GetObject();
+    mirror::Object* obj = m->GetObject();
+    mirror::Object* new_obj = visitor(obj, arg);
+    if (new_obj == nullptr) {
+      VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
+                    << m->GetObject();
       delete m;
       it = list_.erase(it);
     } else {
+      m->SetObject(new_obj);
       ++it;
     }
   }
 }
 
-MonitorInfo::MonitorInfo(mirror::Object* o) : owner(NULL), entry_count(0) {
-  uint32_t lock_word = *o->GetRawLockWordAddress();
-  if (LW_SHAPE(lock_word) == LW_SHAPE_THIN) {
-    uint32_t owner_thin_lock_id = LW_LOCK_OWNER(lock_word);
-    if (owner_thin_lock_id != 0) {
-      owner = Runtime::Current()->GetThreadList()->FindThreadByThinLockId(owner_thin_lock_id);
-      entry_count = 1 + LW_LOCK_COUNT(lock_word);
-    }
-    // Thin locks have no waiters.
-  } else {
-    CHECK_EQ(LW_SHAPE(lock_word), LW_SHAPE_FAT);
-    Monitor* monitor = LW_MONITOR(lock_word);
-    owner = monitor->owner_;
-    entry_count = 1 + monitor->lock_count_;
-    for (Thread* waiter = monitor->wait_set_; waiter != NULL; waiter = waiter->wait_next_) {
-      waiters.push_back(waiter);
+MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(NULL), entry_count_(0) {
+  DCHECK(obj != NULL);
+
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      break;
+    case LockWord::kThinLocked:
+      owner_ = Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+      entry_count_ = 1 + lock_word.ThinLockCount();
+      // Thin locks have no waiters.
+      break;
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      owner_ = mon->owner_;
+      entry_count_ = 1 + mon->lock_count_;
+      for (Thread* waiter = mon->wait_set_; waiter != NULL; waiter = waiter->wait_next_) {
+        waiters_.push_back(waiter);
+      }
+      break;
     }
   }
 }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 0b5b7e5..27124a2 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -30,47 +30,28 @@
 
 namespace art {
 
-/*
- * Monitor shape field. Used to distinguish thin locks from fat locks.
- */
-#define LW_SHAPE_THIN 0
-#define LW_SHAPE_FAT 1
-
-/*
- * Hash state field.  Used to signify that an object has had its
- * identity hash code exposed or relocated.
- */
-#define LW_HASH_STATE_UNHASHED 0
-#define LW_HASH_STATE_HASHED 1
-#define LW_HASH_STATE_HASHED_AND_MOVED 3
-#define LW_HASH_STATE_MASK 0x3
-#define LW_HASH_STATE_SHIFT 1
-#define LW_HASH_STATE(x) (((x) >> LW_HASH_STATE_SHIFT) & LW_HASH_STATE_MASK)
-
-/*
- * Lock owner field.  Contains the thread id of the thread currently
- * holding the lock.
- */
-#define LW_LOCK_OWNER_MASK 0xffff
-#define LW_LOCK_OWNER_SHIFT 3
-#define LW_LOCK_OWNER(x) (((x) >> LW_LOCK_OWNER_SHIFT) & LW_LOCK_OWNER_MASK)
-
 namespace mirror {
   class ArtMethod;
   class Object;
 }  // namespace mirror
+class LockWord;
 class Thread;
 class StackVisitor;
 
 class Monitor {
  public:
+  // The default number of spins that are done before thread suspension is used to forcibly inflate
+  // a lock word. See Runtime::max_spins_before_thin_lock_inflation_.
+  constexpr static size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;
+
   ~Monitor();
 
   static bool IsSensitiveThread();
   static void Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)());
 
-  static uint32_t GetThinLockId(uint32_t raw_lock_word)
-      NO_THREAD_SAFETY_ANALYSIS;  // Reading lock owner without holding lock is racy.
+  // Return the thread id of the lock owner or 0 when there is no owner.
+  static uint32_t GetLockOwnerThreadId(mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS;  // TODO: Reading lock owner without holding lock is racy.
 
   static void MonitorEnter(Thread* thread, mirror::Object* obj)
       EXCLUSIVE_LOCK_FUNCTION(monitor_lock_)
@@ -80,9 +61,13 @@
       UNLOCK_FUNCTION(monitor_lock_);
 
   static void Notify(Thread* self, mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DoNotify(self, obj, false);
+  }
   static void NotifyAll(Thread* self, mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DoNotify(self, obj, true);
+  }
   static void Wait(Thread* self, mirror::Object* obj, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -92,7 +77,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Used to implement JDWP's ThreadReference.CurrentContendedMonitor.
-  static mirror::Object* GetContendedMonitor(Thread* thread);
+  static mirror::Object* GetContendedMonitor(Thread* thread)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Calls 'callback' once for each lock held in the single stack frame represented by
   // the current state of 'stack_visitor'.
@@ -100,18 +86,33 @@
                          void* callback_context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static bool IsValidLockWord(int32_t lock_word);
+  static bool IsValidLockWord(LockWord lock_word);
 
-  mirror::Object* GetObject();
+  // TODO: SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  mirror::Object* GetObject() const {
+    return obj_;
+  }
+
+  void SetObject(mirror::Object* object);
+
+  Thread* GetOwner() const NO_THREAD_SAFETY_ANALYSIS {
+    return owner_;
+  }
 
  private:
   explicit Monitor(Thread* owner, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Install the monitor into its object, may fail if another thread installs a different monitor
+  // first.
+  bool Install(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void AppendToWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
   void RemoveFromWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
 
-  static void Inflate(Thread* self, mirror::Object* obj)
+  static void Inflate(Thread* self, Thread* owner, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
@@ -122,44 +123,50 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Lock(Thread* self) EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
-  bool Unlock(Thread* thread, bool for_wait) UNLOCK_FUNCTION(monitor_lock_);
-
-  void Notify(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
-  void NotifyWithLock(Thread* self)
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+  void Lock(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Unlock(Thread* thread)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void NotifyAll(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
-  void NotifyAllWithLock()
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+  static void DoNotify(Thread* self, mirror::Object* obj, bool notify_all)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void Notify(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void NotifyAll(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
   void Wait(Thread* self, int64_t msec, int32_t nsec, bool interruptShouldThrow, ThreadState why)
-      NO_THREAD_SAFETY_ANALYSIS;
-  void WaitWithLock(Thread* self, int64_t ms, int32_t ns, bool interruptShouldThrow, ThreadState why)
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
   void TranslateLocation(const mirror::ArtMethod* method, uint32_t pc,
-                         const char*& source_file, uint32_t& line_number) const
+                         const char** source_file, uint32_t* line_number) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  uint32_t GetOwnerThreadId();
+
   static bool (*is_sensitive_thread_hook_)();
   static uint32_t lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
   // Which thread currently owns the lock?
-  Thread* volatile owner_;
+  Thread* volatile owner_ GUARDED_BY(monitor_lock_);
 
   // Owner's recursive lock depth.
   int lock_count_ GUARDED_BY(monitor_lock_);
 
-  // What object are we part of (for debugging).
-  mirror::Object* const obj_;
+  // What object are we part of.
+  mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
   Thread* wait_set_ GUARDED_BY(monitor_lock_);
@@ -182,10 +189,11 @@
   ~MonitorList();
 
   void Add(Monitor* m);
-  void SweepMonitorList(IsMarkedTester is_marked, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void SweepMonitorList(RootVisitor visitor, void* arg);
   void DisallowNewMonitors();
   void AllowNewMonitors();
+
  private:
   bool allow_new_monitors_ GUARDED_BY(monitor_list_lock_);
   Mutex monitor_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -203,9 +211,9 @@
  public:
   explicit MonitorInfo(mirror::Object* o) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Thread* owner;
-  size_t entry_count;
-  std::vector<Thread*> waiters;
+  Thread* owner_;
+  size_t entry_count_;
+  std::vector<Thread*> waiters_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(MonitorInfo);
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 8efa072..d89290b 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -81,7 +81,7 @@
   mirror::ArtMethod* m = self->GetCurrentMethod(&pc);
   const char* filename;
   uint32_t line_number;
-  TranslateLocation(m, pc, filename, line_number);
+  TranslateLocation(m, pc, &filename, &line_number);
   cp = EventLogWriteString(cp, filename, strlen(filename));
 
   // Emit the source code line number, 5 bytes.
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 5fc8bd5..dad6eff 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -169,10 +169,7 @@
 
   // Trim the managed heap.
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  gc::space::DlMallocSpace* alloc_space = heap->GetAllocSpace();
-  size_t alloc_space_size = alloc_space->Size();
-  float managed_utilization =
-      static_cast<float>(alloc_space->GetBytesAllocated()) / alloc_space_size;
+  float managed_utilization = static_cast<float>(heap->GetBytesAllocated()) / heap->GetTotalMemory();
   size_t managed_reclaimed = heap->Trim();
 
   uint64_t gc_heap_end_ns = NanoTime();
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index eaf67b8..5508270 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -34,7 +34,7 @@
   }
   // Suspend thread to build stack trace.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer, true, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
   if (thread != NULL) {
     jobject trace;
     {
@@ -42,7 +42,7 @@
       trace = thread->CreateInternalStackTrace(soa);
     }
     // Restart suspended thread.
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    Runtime::Current()->GetThreadList()->Resume(thread, false);
     return trace;
   } else {
     if (timed_out) {
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index f8eeb29..9b83206 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -26,7 +26,7 @@
   ScopedObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
   // Should only be called while holding the lock on the dex cache.
-  DCHECK_EQ(dex_cache->GetThinLockId(), soa.Self()->GetThinLockId());
+  DCHECK_EQ(dex_cache->GetLockOwnerThreadId(), soa.Self()->GetThreadId());
   const DexFile* dex_file = dex_cache->GetDexFile();
   if (dex_file == NULL) {
     return NULL;
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 30b4dc7..100f5a9 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -316,6 +316,28 @@
   }
 }
 
+static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst, jint dstPos, jint length) {
+  ScopedObjectAccess soa(env);
+  DCHECK(javaSrc != NULL);
+  DCHECK(javaDst != NULL);
+  mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
+  mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
+  DCHECK(srcObject->IsArrayInstance());
+  DCHECK(dstObject->IsArrayInstance());
+  mirror::Array* srcArray = srcObject->AsArray();
+  mirror::Array* dstArray = dstObject->AsArray();
+  DCHECK(srcPos >= 0 && dstPos >= 0 && length >= 0 &&
+         srcPos + length <= srcArray->GetLength() && dstPos + length <= dstArray->GetLength());
+  DCHECK_EQ(srcArray->GetClass()->GetComponentType(), dstArray->GetClass()->GetComponentType());
+  DCHECK(srcArray->GetClass()->GetComponentType()->IsPrimitive());
+  DCHECK(dstArray->GetClass()->GetComponentType()->IsPrimitive());
+  DCHECK_EQ(srcArray->GetClass()->GetComponentSize(), static_cast<size_t>(2));
+  DCHECK_EQ(dstArray->GetClass()->GetComponentSize(), static_cast<size_t>(2));
+  uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(2));
+  const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(2));
+  move16(dstBytes + dstPos * 2, srcBytes + srcPos * 2, length * 2);
+}
+
 static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
   ScopedObjectAccess soa(env);
   mirror::Object* o = soa.Decode<mirror::Object*>(javaObject);
@@ -324,6 +346,7 @@
 
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(System, arraycopy, "(Ljava/lang/Object;ILjava/lang/Object;II)V"),
+  NATIVE_METHOD(System, arraycopyCharUnchecked, "([CI[CII)V"),
   NATIVE_METHOD(System, identityHashCode, "(Ljava/lang/Object;)I"),
 };
 
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index e85ef09..a9de086 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -122,13 +122,13 @@
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
   // in the DDMS send code.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer, true, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
   if (thread != NULL) {
     {
       ScopedObjectAccess soa(env);
       thread->SetThreadName(name.c_str());
     }
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    Runtime::Current()->GetThreadList()->Resume(thread, false);
   } else if (timed_out) {
     LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
         "failed to suspend within a generous timeout.";
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 0676968..4f81a0b 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -44,22 +44,10 @@
  * NULL on failure, e.g. if the threadId couldn't be found.
  */
 static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint thin_lock_id) {
-  ScopedLocalRef<jobject> peer(env, NULL);
-  {
-    Thread* t = Runtime::Current()->GetThreadList()->FindThreadByThinLockId(thin_lock_id);
-    if (t == NULL) {
-      return NULL;
-    }
-    ScopedObjectAccess soa(env);
-    peer.reset(soa.AddLocalReference<jobject>(t->GetPeer()));
-  }
-  if (peer.get() == NULL) {
-    return NULL;
-  }
-
   // Suspend thread to build stack trace.
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer.get(), true, &timed_out);
+  Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
   if (thread != NULL) {
     jobject trace;
     {
@@ -67,7 +55,7 @@
       trace = thread->CreateInternalStackTrace(soa);
     }
     // Restart suspended thread.
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    thread_list->Resume(thread, false);
     return Thread::InternalStackTraceToStackTraceElementArray(env, trace);
   } else {
     if (timed_out) {
@@ -115,7 +103,7 @@
   GetTaskStats(t->GetTid(), &native_thread_state, &utime, &stime, &task_cpu);
 
   std::vector<uint8_t>& bytes = *reinterpret_cast<std::vector<uint8_t>*>(context);
-  JDWP::Append4BE(bytes, t->GetThinLockId());
+  JDWP::Append4BE(bytes, t->GetThreadId());
   JDWP::Append1BE(bytes, Dbg::ToJdwpThreadStatus(t->GetState()));
   JDWP::Append4BE(bytes, t->GetTid());
   JDWP::Append4BE(bytes, utime);
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 6ee3016..8062a89 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -36,7 +36,8 @@
 
 class ObjectLock {
  public:
-  explicit ObjectLock(Thread* self, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  explicit ObjectLock(Thread* self, mirror::Object* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : self_(self), obj_(object) {
     CHECK(object != NULL);
     obj_->MonitorEnter(self_);
@@ -111,6 +112,17 @@
     }
   }
 
+  StringPiece GetDescriptorAsStringPiece() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CHECK(klass_ != NULL);
+    if (UNLIKELY(klass_->IsArrayClass() || klass_->IsPrimitive() || klass_->IsProxyClass())) {
+      return StringPiece(GetDescriptor());
+    } else {
+      const DexFile& dex_file = GetDexFile();
+      const DexFile::TypeId& type_id = dex_file.GetTypeId(GetClassDef()->class_idx_);
+      return dex_file.StringDataAsStringPieceByIdx(type_id.descriptor_idx_);
+    }
+  }
+
   const char* GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string result("[");
     const mirror::Class* saved_klass = klass_;
@@ -182,7 +194,7 @@
   }
 
   const char* GetSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string descriptor(GetDescriptor());
+    std::string descriptor(GetDescriptorAsStringPiece().as_string());
     const DexFile& dex_file = GetDexFile();
     const DexFile::ClassDef* dex_class_def = GetClassDef();
     CHECK(dex_class_def != NULL);
@@ -267,53 +279,77 @@
     }
     field_ = new_f;
   }
+
   const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
-    if (!field_->GetDeclaringClass()->IsProxyClass()) {
-      const DexFile& dex_file = GetDexFile();
-      return dex_file.GetFieldName(dex_file.GetFieldId(field_index));
-    } else {
+    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
       DCHECK(field_->IsStatic());
       DCHECK_LT(field_index, 2U);
       return field_index == 0 ? "interfaces" : "throws";
     }
+    const DexFile& dex_file = GetDexFile();
+    return dex_file.GetFieldName(dex_file.GetFieldId(field_index));
   }
+
+  StringPiece GetNameAsStringPiece() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t field_index = field_->GetDexFieldIndex();
+    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
+      return StringPiece(GetName());
+    }
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+    return dex_file.StringDataAsStringPieceByIdx(field_id.name_idx_);
+  }
+
   mirror::Class* GetType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
-    if (!field_->GetDeclaringClass()->IsProxyClass()) {
-      const DexFile& dex_file = GetDexFile();
-      const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-      mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
-      if (resolve && (type == NULL)) {
-        type = GetClassLinker()->ResolveType(field_id.type_idx_, field_);
-        CHECK(type != NULL || Thread::Current()->IsExceptionPending());
-      }
-      return type;
-    } else {
+    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
       return GetClassLinker()->FindSystemClass(GetTypeDescriptor());
     }
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+    mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
+    if (resolve && (type == NULL)) {
+      type = GetClassLinker()->ResolveType(field_id.type_idx_, field_);
+      CHECK(type != NULL || Thread::Current()->IsExceptionPending());
+    }
+    return type;
   }
+
   const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
-    if (!field_->GetDeclaringClass()->IsProxyClass()) {
-      const DexFile& dex_file = GetDexFile();
-      const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-      return dex_file.GetFieldTypeDescriptor(field_id);
-    } else {
+    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
       DCHECK(field_->IsStatic());
       DCHECK_LT(field_index, 2U);
       // 0 == Class[] interfaces; 1 == Class[][] throws;
       return field_index == 0 ? "[Ljava/lang/Class;" : "[[Ljava/lang/Class;";
     }
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+    return dex_file.GetFieldTypeDescriptor(field_id);
   }
+
+  StringPiece GetTypeDescriptorAsStringPiece() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t field_index = field_->GetDexFieldIndex();
+    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
+      return StringPiece(GetTypeDescriptor());
+    }
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(field_id.type_idx_);
+    return dex_file.StringDataAsStringPieceByIdx(type_id.descriptor_idx_);
+  }
+
   Primitive::Type GetTypeAsPrimitiveType()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return Primitive::GetType(GetTypeDescriptor()[0]);
   }
+
   bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     return type != Primitive::kPrimNot;
   }
+
   size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     return Primitive::FieldSize(type);
@@ -324,18 +360,17 @@
   const char* GetDeclaringClassDescriptor()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
-    if (!field_->GetDeclaringClass()->IsProxyClass()) {
-      const DexFile& dex_file = GetDexFile();
-      const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-      return dex_file.GetFieldDeclaringClassDescriptor(field_id);
-    } else {
+    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
       DCHECK(field_->IsStatic());
       DCHECK_LT(field_index, 2U);
       // 0 == Class[] interfaces; 1 == Class[][] throws;
       ClassHelper kh(field_->GetDeclaringClass());
-      declaring_class_descriptor_ = kh.GetDescriptor();
+      declaring_class_descriptor_ = kh.GetDescriptorAsStringPiece().as_string();
       return declaring_class_descriptor_.c_str();
     }
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+    return dex_file.GetFieldDeclaringClassDescriptor(field_id);
   }
 
  private:
@@ -417,7 +452,7 @@
   const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
-    if (dex_method_idx != DexFile::kDexNoIndex) {
+    if (LIKELY(dex_method_idx != DexFile::kDexNoIndex)) {
       return dex_file.GetMethodName(dex_file.GetMethodId(dex_method_idx));
     } else {
       Runtime* runtime = Runtime::Current();
@@ -435,6 +470,16 @@
     }
   }
 
+  StringPiece GetNameAsStringPiece() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile& dex_file = GetDexFile();
+    uint32_t dex_method_idx = method_->GetDexMethodIndex();
+    if (UNLIKELY(dex_method_idx == DexFile::kDexNoIndex)) {
+      return StringPiece(GetName());
+    }
+    const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
+    return dex_file.StringDataAsStringPieceByIdx(method_id.name_idx_);
+  }
+
   mirror::String* GetNameAsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
@@ -460,13 +505,13 @@
     return shorty_len_;
   }
 
-  const std::string GetSignature() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const Signature GetSignature() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     if (dex_method_idx != DexFile::kDexNoIndex) {
       return dex_file.GetMethodSignature(dex_file.GetMethodId(dex_method_idx));
     } else {
-      return "<no signature>";
+      return Signature::NoSignature();
     }
   }
 
@@ -508,11 +553,22 @@
   const char* GetDeclaringClassDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
-    if (dex_method_idx != DexFile::kDexNoIndex) {
-      return dex_file.GetMethodDeclaringClassDescriptor(dex_file.GetMethodId(dex_method_idx));
-    } else {
+    if (UNLIKELY(dex_method_idx == DexFile::kDexNoIndex)) {
       return "<runtime method>";
     }
+    return dex_file.GetMethodDeclaringClassDescriptor(dex_file.GetMethodId(dex_method_idx));
+  }
+
+  StringPiece GetDeclaringClassDescriptorAsStringPiece()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile& dex_file = GetDexFile();
+    uint32_t dex_method_idx = method_->GetDexMethodIndex();
+    if (UNLIKELY(dex_method_idx == DexFile::kDexNoIndex)) {
+      return StringPiece("<runtime method>");
+    }
+    const DexFile::MethodId& mid = dex_file.GetMethodId(dex_method_idx);
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(mid.class_idx_);
+    return dex_file.StringDataAsStringPieceByIdx(type_id.descriptor_idx_);
   }
 
   const char* GetDeclaringClassSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -536,7 +592,7 @@
   }
 
   bool IsClassInitializer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return IsStatic() && StringPiece(GetName()) == "<clinit>";
+    return method_->IsConstructor() && IsStatic();
   }
 
   size_t NumArgs() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -569,16 +625,21 @@
 
   bool HasSameNameAndSignature(MethodHelper* other)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
     if (GetDexCache() == other->GetDexCache()) {
-      const DexFile& dex_file = GetDexFile();
-      const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
       const DexFile::MethodId& other_mid =
           dex_file.GetMethodId(other->method_->GetDexMethodIndex());
       return mid.name_idx_ == other_mid.name_idx_ && mid.proto_idx_ == other_mid.proto_idx_;
     }
-    StringPiece name(GetName());
-    StringPiece other_name(other->GetName());
-    return name == other_name && GetSignature() == other->GetSignature();
+    const DexFile& other_dex_file = other->GetDexFile();
+    const DexFile::MethodId& other_mid =
+        other_dex_file.GetMethodId(other->method_->GetDexMethodIndex());
+    if (dex_file.StringDataAsStringPieceByIdx(mid.name_idx_) !=
+        other_dex_file.StringDataAsStringPieceByIdx(other_mid.name_idx_)) {
+      return false;  // Name mismatch.
+    }
+    return dex_file.GetMethodSignature(mid) == other_dex_file.GetMethodSignature(other_mid);
   }
 
   const DexFile::CodeItem* GetCodeItem()
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 8e23cbb..e95fdb9 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -38,16 +38,16 @@
 ReferenceTable::~ReferenceTable() {
 }
 
-void ReferenceTable::Add(const mirror::Object* obj) {
+void ReferenceTable::Add(mirror::Object* obj) {
   DCHECK(obj != NULL);
-  if (entries_.size() == max_size_) {
+  if (entries_.size() >= max_size_) {
     LOG(FATAL) << "ReferenceTable '" << name_ << "' "
                << "overflowed (" << max_size_ << " entries)";
   }
   entries_.push_back(obj);
 }
 
-void ReferenceTable::Remove(const mirror::Object* obj) {
+void ReferenceTable::Remove(mirror::Object* obj) {
   // We iterate backwards on the assumption that references are LIFO.
   for (int i = entries_.size() - 1; i >= 0; --i) {
     if (entries_[i] == obj) {
@@ -232,8 +232,8 @@
 }
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
-  for (const auto& ref : entries_) {
-    visitor(ref, arg);
+  for (auto& ref : entries_) {
+    ref = visitor(const_cast<mirror::Object*>(ref), arg);
   }
 }
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index e369fd0..37b3172 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -39,9 +39,9 @@
   ReferenceTable(const char* name, size_t initial_size, size_t max_size);
   ~ReferenceTable();
 
-  void Add(const mirror::Object* obj);
+  void Add(mirror::Object* obj);
 
-  void Remove(const mirror::Object* obj);
+  void Remove(mirror::Object* obj);
 
   size_t Size() const;
 
@@ -50,7 +50,7 @@
   void VisitRoots(RootVisitor* visitor, void* arg);
 
  private:
-  typedef std::vector<const mirror::Object*> Table;
+  typedef std::vector<mirror::Object*> Table;
   static void Dump(std::ostream& os, const Table& entries)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   friend class IndirectReferenceTable;  // For Dump.
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 3e58b4b..4ff7349 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -323,7 +323,7 @@
   }
 
   JValue boxed_value;
-  std::string src_descriptor(ClassHelper(o->GetClass()).GetDescriptor());
+  const StringPiece src_descriptor(ClassHelper(o->GetClass()).GetDescriptorAsStringPiece());
   mirror::Class* src_class = NULL;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::ArtField* primitive_field = o->GetClass()->GetIFields()->Get(0);
@@ -356,7 +356,7 @@
                                   StringPrintf("%s has type %s, got %s",
                                                UnboxingFailureKind(m, index, f).c_str(),
                                                PrettyDescriptor(dst_class).c_str(),
-                                               PrettyDescriptor(src_descriptor.c_str()).c_str()).c_str());
+                                               PrettyDescriptor(src_descriptor.data()).c_str()).c_str());
     return false;
   }
 
diff --git a/runtime/root_visitor.h b/runtime/root_visitor.h
index 3aa9b4b..a2d898b 100644
--- a/runtime/root_visitor.h
+++ b/runtime/root_visitor.h
@@ -23,7 +23,8 @@
 }  // namespace mirror
 class StackVisitor;
 
-typedef void (RootVisitor)(const mirror::Object* root, void* arg);
+typedef mirror::Object* (RootVisitor)(mirror::Object* root, void* arg)
+    __attribute__((warn_unused_result));
 typedef void (VerifyRootVisitor)(const mirror::Object* root, void* arg, size_t vreg,
                                  const StackVisitor* visitor);
 typedef bool (IsMarkedTester)(const mirror::Object* object, void* arg);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b9f54e5..bdedef4 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -75,6 +75,7 @@
       is_explicit_gc_disabled_(false),
       default_stack_size_(0),
       heap_(NULL),
+      max_spins_before_thin_lock_inflation_(Monitor::kDefaultMaxSpinsBeforeThinLockInflation),
       monitor_list_(NULL),
       thread_list_(NULL),
       intern_table_(NULL),
@@ -99,7 +100,8 @@
       use_compile_time_class_path_(false),
       main_thread_group_(NULL),
       system_thread_group_(NULL),
-      system_class_loader_(NULL) {
+      system_class_loader_(NULL),
+      quick_alloc_entry_points_instrumentation_counter_(0) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     callee_save_methods_[i] = NULL;
   }
@@ -319,6 +321,12 @@
   return result;
 }
 
+void Runtime::SweepSystemWeaks(RootVisitor* visitor, void* arg) {
+  GetInternTable()->SweepInternTableWeaks(visitor, arg);
+  GetMonitorList()->SweepMonitorList(visitor, arg);
+  GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
+}
+
 Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, bool ignore_unrecognized) {
   UniquePtr<ParsedOptions> parsed(new ParsedOptions());
   const char* boot_class_path_string = getenv("BOOTCLASSPATH");
@@ -343,6 +351,7 @@
   // Only the main GC thread, no workers.
   parsed->conc_gc_threads_ = 0;
   parsed->stack_size_ = 0;  // 0 means default.
+  parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
 
   parsed->is_compiler_ = false;
@@ -503,6 +512,10 @@
         return NULL;
       }
       parsed->stack_size_ = size;
+    } else if (StartsWith(option, "-XX:MaxSpinsBeforeThinLockInflation=")) {
+      parsed->max_spins_before_thin_lock_inflation_ =
+          strtoul(option.substr(strlen("-XX:MaxSpinsBeforeThinLockInflation=")).c_str(),
+                  nullptr, 10);
     } else if (option == "-XX:LongPauseLogThreshold") {
       parsed->long_pause_log_threshold_ =
           ParseMemoryOption(option.substr(strlen("-XX:LongPauseLogThreshold=")).c_str(), 1024);
@@ -865,6 +878,8 @@
   default_stack_size_ = options->stack_size_;
   stack_trace_file_ = options->stack_trace_file_;
 
+  max_spins_before_thin_lock_inflation_ = options->max_spins_before_thin_lock_inflation_;
+
   monitor_list_ = new MonitorList;
   thread_list_ = new ThreadList;
   intern_table_ = new InternTable;
@@ -900,7 +915,7 @@
   // objects. We can't supply a thread group yet; it will be fixed later. Since we are the main
   // thread, we do not get a java peer.
   Thread* self = Thread::Attach("main", false, NULL, false);
-  CHECK_EQ(self->thin_lock_id_, ThreadList::kMainId);
+  CHECK_EQ(self->thin_lock_thread_id_, ThreadList::kMainThreadId);
   CHECK(self != NULL);
 
   // Set us to runnable so tools using a runtime can allocate and GC by default
@@ -1060,6 +1075,9 @@
     GetStats()->Clear(~0);
     // TODO: wouldn't it make more sense to clear _all_ threads' stats?
     Thread::Current()->GetStats()->Clear(~0);
+    InstrumentQuickAllocEntryPoints();
+  } else {
+    UninstrumentQuickAllocEntryPoints();
   }
   stats_enabled_ = new_state;
 }
@@ -1150,12 +1168,17 @@
 
 void Runtime::VisitNonThreadRoots(RootVisitor* visitor, void* arg) {
   java_vm_->VisitRoots(visitor, arg);
-  if (pre_allocated_OutOfMemoryError_ != NULL) {
-    visitor(pre_allocated_OutOfMemoryError_, arg);
+  if (pre_allocated_OutOfMemoryError_ != nullptr) {
+    pre_allocated_OutOfMemoryError_ = reinterpret_cast<mirror::Throwable*>(
+        visitor(pre_allocated_OutOfMemoryError_, arg));
+    DCHECK(pre_allocated_OutOfMemoryError_ != nullptr);
   }
-  visitor(resolution_method_, arg);
+  resolution_method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
+  DCHECK(resolution_method_ != nullptr);
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    visitor(callee_save_methods_[i], arg);
+    callee_save_methods_[i] = reinterpret_cast<mirror::ArtMethod*>(
+        visitor(callee_save_methods_[i], arg));
+    DCHECK(callee_save_methods_[i] != nullptr);
   }
 }
 
@@ -1282,4 +1305,46 @@
   compile_time_class_paths_.Put(class_loader, class_path);
 }
 
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
+  thread->ResetQuickAllocEntryPointsForThread();
+}
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented);
+
+void Runtime::InstrumentQuickAllocEntryPoints() {
+  ThreadList* tl = thread_list_;
+  Thread* self = Thread::Current();
+  tl->SuspendAll();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_, 0);
+    int old_counter = quick_alloc_entry_points_instrumentation_counter_++;
+    if (old_counter == 0) {
+      // If it was disabled, enable it.
+      SetQuickAllocEntryPointsInstrumented(true);
+      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
+    }
+  }
+  tl->ResumeAll();
+}
+
+void Runtime::UninstrumentQuickAllocEntryPoints() {
+  ThreadList* tl = thread_list_;
+  Thread* self = Thread::Current();
+  tl->SuspendAll();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_, 0);
+    int new_counter = --quick_alloc_entry_points_instrumentation_counter_;
+    if (new_counter == 0) {
+      // Disable it if the counter becomes zero.
+      SetQuickAllocEntryPointsInstrumented(false);
+      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
+    }
+  }
+  tl->ResumeAll();
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index bc5c8b0..dd04ac7 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -112,6 +112,7 @@
     size_t parallel_gc_threads_;
     size_t conc_gc_threads_;
     size_t stack_size_;
+    size_t max_spins_before_thin_lock_inflation_;
     bool low_memory_mode_;
     size_t lock_profiling_threshold_;
     std::string stack_trace_file_;
@@ -286,6 +287,10 @@
     return java_vm_;
   }
 
+  size_t GetMaxSpinsBeforeThinkLockInflation() const {
+    return max_spins_before_thin_lock_inflation_;
+  }
+
   MonitorList* GetMonitorList() const {
     return monitor_list_;
   }
@@ -323,6 +328,10 @@
   void VisitNonConcurrentRoots(RootVisitor* visitor, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Sweep system weaks, the system weak is deleted if the visitor return nullptr. Otherwise, the
+  // system weak is updated to be the visitor's returned value.
+  void SweepSystemWeaks(RootVisitor* visitor, void* arg);
+
   // Returns a special method that calls into a trampoline for runtime method resolution
   mirror::ArtMethod* GetResolutionMethod() const {
     CHECK(HasResolutionMethod());
@@ -397,6 +406,9 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
+  void InstrumentQuickAllocEntryPoints();
+  void UninstrumentQuickAllocEntryPoints();
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -451,6 +463,8 @@
 
   gc::Heap* heap_;
 
+  // The number of spins that are done before thread suspension is used to forcibly inflate.
+  size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;
 
   ThreadList* thread_list_;
@@ -513,6 +527,8 @@
   // As returned by ClassLoader.getSystemClassLoader().
   jobject system_class_loader_;
 
+  int quick_alloc_entry_points_instrumentation_counter_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 15eb27d..fe62e25 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -147,7 +147,6 @@
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   if (self->ReadFlag(kCheckpointRequest)) {
     self->RunCheckpointFunction();
-    self->AtomicClearFlag(kCheckpointRequest);
   }
   self->EndAssertNoThreadSuspension(old_cause);
   thread_list->ResumeAll();
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index 81f0dff..a1f8a66 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -30,7 +30,8 @@
     self_->PushSirt(&sirt_);
   }
   ~SirtRef() {
-    CHECK(self_->PopSirt() == &sirt_);
+    StackIndirectReferenceTable* top_sirt = self_->PopSirt();
+    DCHECK_EQ(top_sirt, &sirt_);
   }
 
   T& operator*() const { return *get(); }
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 206bff3..5d3a9a5 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -148,8 +148,8 @@
       const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
       size_t frame_size = m->GetFrameSizeInBytes();
-      return GetVReg(cur_quick_frame_, code_item, m->GetCoreSpillMask(), m->GetFpSpillMask(),
-                     frame_size, vreg);
+      return *GetVRegAddr(cur_quick_frame_, code_item, m->GetCoreSpillMask(), m->GetFpSpillMask(),
+                          frame_size, vreg);
     }
   } else {
     return cur_shadow_frame_->GetVReg(vreg);
@@ -253,7 +253,7 @@
   return result;
 }
 
-instrumentation::InstrumentationStackFrame StackVisitor::GetInstrumentationStackFrame(uint32_t depth) const {
+instrumentation::InstrumentationStackFrame& StackVisitor::GetInstrumentationStackFrame(uint32_t depth) const {
   return thread_->GetInstrumentationStack()->at(depth);
 }
 
@@ -309,7 +309,7 @@
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
           if (GetQuickInstrumentationExitPc() == return_pc) {
-            instrumentation::InstrumentationStackFrame instrumentation_frame =
+            const instrumentation::InstrumentationStackFrame& instrumentation_frame =
                 GetInstrumentationStackFrame(instrumentation_stack_depth);
             instrumentation_stack_depth++;
             if (GetMethod() == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAll)) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 7c87f45..a4b93bc 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -68,8 +68,7 @@
   static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
                              mirror::ArtMethod* method, uint32_t dex_pc) {
     uint8_t* memory = new uint8_t[ComputeSize(num_vregs)];
-    ShadowFrame* sf = new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
-    return sf;
+    return Create(num_vregs, link, method, dex_pc, memory);
   }
 
   // Create ShadowFrame for interpreter using provided memory.
@@ -154,7 +153,12 @@
   mirror::Object* GetVRegReference(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     if (HasReferenceArray()) {
-      return References()[i];
+      mirror::Object* ref = References()[i];
+      // If the vreg reference is not equal to the vreg then the vreg reference is stale.
+      if (reinterpret_cast<uint32_t>(ref) != vregs_[i]) {
+        return nullptr;
+      }
+      return ref;
     } else {
       const uint32_t* vreg = &vregs_[i];
       return *reinterpret_cast<mirror::Object* const*>(vreg);
@@ -467,13 +471,14 @@
   uintptr_t GetGPR(uint32_t reg) const;
   void SetGPR(uint32_t reg, uintptr_t value);
 
-  uint32_t GetVReg(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
+  // This is a fast-path for getting/setting values in a quick frame.
+  uint32_t* GetVRegAddr(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
                    uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
                    uint16_t vreg) const {
     int offset = GetVRegOffset(code_item, core_spills, fp_spills, frame_size, vreg);
     DCHECK_EQ(cur_quick_frame, GetCurrentQuickFrame());
     byte* vreg_addr = reinterpret_cast<byte*>(cur_quick_frame) + offset;
-    return *reinterpret_cast<uint32_t*>(vreg_addr);
+    return reinterpret_cast<uint32_t*>(vreg_addr);
   }
 
   uintptr_t GetReturnPc() const;
@@ -562,7 +567,7 @@
   static void DescribeStack(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  instrumentation::InstrumentationStackFrame GetInstrumentationStackFrame(uint32_t depth) const;
+  instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(uint32_t depth) const;
 
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index c22f2cd..7d28785 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,11 +19,24 @@
 
 #include "thread.h"
 
+#include <pthread.h>
+
 #include "base/mutex-inl.h"
 #include "cutils/atomic-inline.h"
 
 namespace art {
 
+inline Thread* Thread::Current() {
+  // We rely on Thread::Current returning NULL for a detached thread, so it's not obvious
+  // that we can replace this with a direct %fs access on x86.
+  if (!is_started_) {
+    return NULL;
+  } else {
+    void* thread = pthread_getspecific(Thread::pthread_key_self_);
+    return reinterpret_cast<Thread*>(thread);
+  }
+}
+
 inline ThreadState Thread::SetState(ThreadState new_state) {
   // Cannot use this code to change into Runnable as changing to Runnable should fail if
   // old_state_and_flags.suspend_request is true.
@@ -67,17 +80,16 @@
   union StateAndFlags new_state_and_flags;
   do {
     old_state_and_flags = state_and_flags_;
+    if (UNLIKELY((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0)) {
+      RunCheckpointFunction();
+      continue;
+    }
     // Copy over flags and try to clear the checkpoint bit if it is set.
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags & ~kCheckpointRequest;
     new_state_and_flags.as_struct.state = new_state;
     // CAS the value without a memory barrier, that will occur in the unlock below.
   } while (UNLIKELY(android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                        &state_and_flags_.as_int) != 0));
-  // If we toggled the checkpoint flag we must have cleared it.
-  uint16_t flag_change = new_state_and_flags.as_struct.flags ^ old_state_and_flags.as_struct.flags;
-  if (UNLIKELY((flag_change & kCheckpointRequest) != 0)) {
-    RunCheckpointFunction();
-  }
   // Release share on mutator_lock_.
   Locks::mutator_lock_->SharedUnlock(this);
 }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index e8326ea..3063658 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -108,6 +108,12 @@
                   &quick_entrypoints_);
 }
 
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+
+void Thread::ResetQuickAllocEntryPointsForThread() {
+  ResetQuickAllocEntryPoints(&quick_entrypoints_);
+}
+
 void Thread::SetDeoptimizationShadowFrame(ShadowFrame* sf) {
   deoptimization_shadow_frame_ = sf;
 }
@@ -305,7 +311,7 @@
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
   DCHECK_EQ(Thread::Current(), this);
 
-  thin_lock_id_ = thread_list->AllocThreadId(this);
+  thin_lock_thread_id_ = thread_list->AllocThreadId(this);
   InitStackHwm();
 
   jni_env_ = new JNIEnvExt(this, java_vm);
@@ -470,9 +476,9 @@
 
 void Thread::ShortDump(std::ostream& os) const {
   os << "Thread[";
-  if (GetThinLockId() != 0) {
+  if (GetThreadId() != 0) {
     // If we're in kStarting, we won't have a thin lock id or tid yet.
-    os << GetThinLockId()
+    os << GetThreadId()
              << ",tid=" << GetTid() << ',';
   }
   os << GetState()
@@ -568,18 +574,32 @@
   ATRACE_BEGIN("Checkpoint function");
   checkpoint_function_->Run(this);
   ATRACE_END();
+  checkpoint_function_ = NULL;
+  AtomicClearFlag(kCheckpointRequest);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
-  CHECK(!ReadFlag(kCheckpointRequest)) << "Already have a pending checkpoint request";
-  checkpoint_function_ = function;
   union StateAndFlags old_state_and_flags = state_and_flags_;
+  if (old_state_and_flags.as_struct.state != kRunnable) {
+    return false;  // Fail, thread is suspended and so can't run a checkpoint.
+  }
+  if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
+    return false;  // Fail, already a checkpoint pending.
+  }
+  CHECK(checkpoint_function_ == NULL);
+  checkpoint_function_ = function;
+  // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   old_state_and_flags.as_struct.state = kRunnable;
   union StateAndFlags new_state_and_flags = old_state_and_flags;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                          &state_and_flags_.as_int);
+  if (UNLIKELY(succeeded != 0)) {
+    // The thread changed state before the checkpoint was installed.
+    CHECK(checkpoint_function_ == function);
+    checkpoint_function_ = NULL;
+  }
   return succeeded == 0;
 }
 
@@ -594,88 +614,6 @@
   VLOG(threads) << this << " self-reviving";
 }
 
-Thread* Thread::SuspendForDebugger(jobject peer, bool request_suspension, bool* timed_out) {
-  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
-  useconds_t total_delay_us = 0;
-  useconds_t delay_us = 0;
-  bool did_suspend_request = false;
-  *timed_out = false;
-  while (true) {
-    Thread* thread;
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      Thread* self = soa.Self();
-      MutexLock mu(self, *Locks::thread_list_lock_);
-      thread = Thread::FromManagedThread(soa, peer);
-      if (thread == NULL) {
-        JNIEnv* env = self->GetJniEnv();
-        ScopedLocalRef<jstring> scoped_name_string(env,
-                                                   (jstring)env->GetObjectField(peer,
-                                                              WellKnownClasses::java_lang_Thread_name));
-        ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
-        if (scoped_name_chars.c_str() == NULL) {
-            LOG(WARNING) << "No such thread for suspend: " << peer;
-            env->ExceptionClear();
-        } else {
-            LOG(WARNING) << "No such thread for suspend: " << peer << ":" << scoped_name_chars.c_str();
-        }
-
-        return NULL;
-      }
-      {
-        MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
-        if (request_suspension) {
-          thread->ModifySuspendCount(soa.Self(), +1, true /* for_debugger */);
-          request_suspension = false;
-          did_suspend_request = true;
-        }
-        // IsSuspended on the current thread will fail as the current thread is changed into
-        // Runnable above. As the suspend count is now raised if this is the current thread
-        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
-        // to just explicitly handle the current thread in the callers to this code.
-        CHECK_NE(thread, soa.Self()) << "Attempt to suspend the current thread for the debugger";
-        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
-        // count, or else we've waited and it has self suspended) or is the current thread, we're
-        // done.
-        if (thread->IsSuspended()) {
-          return thread;
-        }
-        if (total_delay_us >= kTimeoutUs) {
-          LOG(ERROR) << "Thread suspension timed out: " << peer;
-          if (did_suspend_request) {
-            thread->ModifySuspendCount(soa.Self(), -1, true /* for_debugger */);
-          }
-          *timed_out = true;
-          return NULL;
-        }
-      }
-      // Release locks and come out of runnable state.
-    }
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      BaseMutex* held_mutex = Thread::Current()->GetHeldMutex(static_cast<LockLevel>(i));
-      if (held_mutex != NULL) {
-        LOG(FATAL) << "Holding " << held_mutex->GetName()
-            << " while sleeping for thread suspension";
-      }
-    }
-    {
-      useconds_t new_delay_us = delay_us * 2;
-      CHECK_GE(new_delay_us, delay_us);
-      if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
-        delay_us = new_delay_us;
-      }
-    }
-    if (delay_us == 0) {
-      sched_yield();
-      // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
-      delay_us = 500;
-    } else {
-      usleep(delay_us);
-      total_delay_us += delay_us;
-    }
-  }
-}
-
 void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
   std::string group_name;
   int priority;
@@ -712,7 +650,7 @@
       os << " daemon";
     }
     os << " prio=" << priority
-       << " tid=" << thread->GetThinLockId()
+       << " tid=" << thread->GetThreadId()
        << " " << thread->GetState();
     if (thread->IsStillStarting()) {
       os << " (still starting up)";
@@ -962,9 +900,9 @@
       jpeer_(NULL),
       stack_begin_(NULL),
       stack_size_(0),
+      thin_lock_thread_id_(0),
       stack_trace_sample_(NULL),
       trace_clock_base_(0),
-      thin_lock_id_(0),
       tid_(0),
       wait_mutex_(new Mutex("a thread wait mutex")),
       wait_cond_(new ConditionVariable("a thread wait condition variable", *wait_mutex_)),
@@ -1012,9 +950,10 @@
   }
 }
 
-static void MonitorExitVisitor(const mirror::Object* object, void* arg) NO_THREAD_SAFETY_ANALYSIS {
+static mirror::Object* MonitorExitVisitor(mirror::Object* object, void* arg)
+    NO_THREAD_SAFETY_ANALYSIS {
   Thread* self = reinterpret_cast<Thread*>(arg);
-  mirror::Object* entered_monitor = const_cast<mirror::Object*>(object);
+  mirror::Object* entered_monitor = object;
   if (self->HoldsLock(entered_monitor)) {
     LOG(WARNING) << "Calling MonitorExit on object "
                  << object << " (" << PrettyTypeOf(object) << ")"
@@ -1022,6 +961,7 @@
                  << *Thread::Current() << " which is detaching";
     entered_monitor->MonitorExit(self);
   }
+  return object;
 }
 
 void Thread::Destroy() {
@@ -1151,8 +1091,12 @@
     size_t num_refs = cur->NumberOfReferences();
     for (size_t j = 0; j < num_refs; j++) {
       mirror::Object* object = cur->GetReference(j);
-      if (object != NULL) {
-        visitor(object, arg);
+      if (object != nullptr) {
+        const mirror::Object* new_obj = visitor(object, arg);
+        DCHECK(new_obj != nullptr);
+        if (new_obj != object) {
+          cur->SetReference(j, const_cast<mirror::Object*>(new_obj));
+        }
       }
     }
   }
@@ -1381,24 +1325,23 @@
   // Transition into runnable state to work on Object*/Array*
   ScopedObjectAccess soa(env);
   // Decode the internal stack trace into the depth, method trace and PC trace
-  mirror::ObjectArray<mirror::Object>* method_trace =
-      soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal);
-  int32_t depth = method_trace->GetLength() - 1;
-  mirror::IntArray* pc_trace = down_cast<mirror::IntArray*>(method_trace->Get(depth));
+  int32_t depth = soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal)->GetLength() - 1;
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   jobjectArray result;
-  mirror::ObjectArray<mirror::StackTraceElement>* java_traces;
+
   if (output_array != NULL) {
     // Reuse the array we were given.
     result = output_array;
-    java_traces = soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(output_array);
     // ...adjusting the number of frames we'll write to not exceed the array length.
-    depth = std::min(depth, java_traces->GetLength());
+    const int32_t traces_length =
+        soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(result)->GetLength();
+    depth = std::min(depth, traces_length);
   } else {
     // Create java_trace array and place in local reference table
-    java_traces = class_linker->AllocStackTraceElementArray(soa.Self(), depth);
+    mirror::ObjectArray<mirror::StackTraceElement>* java_traces =
+        class_linker->AllocStackTraceElementArray(soa.Self(), depth);
     if (java_traces == NULL) {
       return NULL;
     }
@@ -1411,9 +1354,12 @@
 
   MethodHelper mh;
   for (int32_t i = 0; i < depth; ++i) {
+    mirror::ObjectArray<mirror::Object>* method_trace =
+          soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal);
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
     mh.ChangeMethod(method);
+    mirror::IntArray* pc_trace = down_cast<mirror::IntArray*>(method_trace->Get(depth));
     uint32_t dex_pc = pc_trace->Get(i);
     int32_t line_number = mh.GetLineNumFromDexPC(dex_pc);
     // Allocate element, potentially triggering GC
@@ -1436,8 +1382,9 @@
       return NULL;
     }
     const char* source_file = mh.GetDeclaringClassSourceFile();
-    SirtRef<mirror::String> source_name_object(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(),
-                                                                                                 source_file));
+    SirtRef<mirror::String> source_name_object(soa.Self(),
+                                               mirror::String::AllocFromModifiedUtf8(soa.Self(),
+                                                                                     source_file));
     mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(soa.Self(),
                                                                       class_name_object.get(),
                                                                       method_name_object.get(),
@@ -1446,13 +1393,7 @@
     if (obj == NULL) {
       return NULL;
     }
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // Re-read after potential GC
-    java_traces = Decode<ObjectArray<Object>*>(soa.Env(), result);
-    method_trace = down_cast<ObjectArray<Object>*>(Decode<Object*>(soa.Env(), internal));
-    pc_trace = down_cast<IntArray*>(method_trace->Get(depth));
-#endif
-    java_traces->Set(i, obj);
+    soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(result)->Set(i, obj);
   }
   return result;
 }
@@ -1623,7 +1564,6 @@
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray),
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck),
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial),
-  QUICK_ENTRY_POINT_INFO(pCanPutArrayElement),
   QUICK_ENTRY_POINT_INFO(pCheckCast),
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage),
   QUICK_ENTRY_POINT_INFO(pInitializeTypeAndVerifyAccess),
@@ -1641,6 +1581,9 @@
   QUICK_ENTRY_POINT_INFO(pGet64Static),
   QUICK_ENTRY_POINT_INFO(pGetObjInstance),
   QUICK_ENTRY_POINT_INFO(pGetObjStatic),
+  QUICK_ENTRY_POINT_INFO(pAputObjectWithNullAndBoundCheck),
+  QUICK_ENTRY_POINT_INFO(pAputObjectWithBoundCheck),
+  QUICK_ENTRY_POINT_INFO(pAputObject),
   QUICK_ENTRY_POINT_INFO(pHandleFillArrayData),
   QUICK_ENTRY_POINT_INFO(pJniMethodStart),
   QUICK_ENTRY_POINT_INFO(pJniMethodStartSynchronized),
@@ -1665,7 +1608,7 @@
   QUICK_ENTRY_POINT_INFO(pD2l),
   QUICK_ENTRY_POINT_INFO(pF2l),
   QUICK_ENTRY_POINT_INFO(pLdiv),
-  QUICK_ENTRY_POINT_INFO(pLdivmod),
+  QUICK_ENTRY_POINT_INFO(pLmod),
   QUICK_ENTRY_POINT_INFO(pLmul),
   QUICK_ENTRY_POINT_INFO(pShlLong),
   QUICK_ENTRY_POINT_INFO(pShrLong),
@@ -1709,7 +1652,7 @@
   DO_THREAD_OFFSET(self_);
   DO_THREAD_OFFSET(stack_end_);
   DO_THREAD_OFFSET(suspend_count_);
-  DO_THREAD_OFFSET(thin_lock_id_);
+  DO_THREAD_OFFSET(thin_lock_thread_id_);
   // DO_THREAD_OFFSET(top_of_managed_stack_);
   // DO_THREAD_OFFSET(top_of_managed_stack_pc_);
   DO_THREAD_OFFSET(top_sirt_);
@@ -1992,7 +1935,7 @@
   if (object == NULL) {
     return false;
   }
-  return object->GetThinLockId() == thin_lock_id_;
+  return object->GetLockOwnerThreadId() == thin_lock_thread_id_;
 }
 
 // RootVisitor parameters are: (const Object* obj, size_t vreg, const StackVisitor* visitor).
@@ -2016,8 +1959,11 @@
         // SIRT for JNI or References for interpreter.
         for (size_t reg = 0; reg < num_regs; ++reg) {
           mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-          if (ref != NULL) {
-            visitor_(ref, reg, this);
+          if (ref != nullptr) {
+            mirror::Object* new_ref = visitor_(ref, reg, this);
+            if (new_ref != ref) {
+             shadow_frame->SetVRegReference(reg, new_ref);
+            }
           }
         }
       } else {
@@ -2037,8 +1983,11 @@
         for (size_t reg = 0; reg < num_regs; ++reg) {
           if (TestBitmap(reg, reg_bitmap)) {
             mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-            if (ref != NULL) {
-              visitor_(ref, reg, this);
+            if (ref != nullptr) {
+              mirror::Object* new_ref = visitor_(ref, reg, this);
+              if (new_ref != ref) {
+               shadow_frame->SetVRegReference(reg, new_ref);
+              }
             }
           }
         }
@@ -2069,19 +2018,25 @@
             // Does this register hold a reference?
             if (TestBitmap(reg, reg_bitmap)) {
               uint32_t vmap_offset;
-              mirror::Object* ref;
               if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
-                uintptr_t val = GetGPR(vmap_table.ComputeRegister(core_spills, vmap_offset,
-                                                                  kReferenceVReg));
-                ref = reinterpret_cast<mirror::Object*>(val);
+                int vmap_reg = vmap_table.ComputeRegister(core_spills, vmap_offset, kReferenceVReg);
+                mirror::Object* ref = reinterpret_cast<mirror::Object*>(GetGPR(vmap_reg));
+                if (ref != nullptr) {
+                  mirror::Object* new_ref = visitor_(ref, reg, this);
+                  if (ref != new_ref) {
+                    SetGPR(vmap_reg, reinterpret_cast<uintptr_t>(new_ref));
+                  }
+                }
               } else {
-                ref = reinterpret_cast<mirror::Object*>(GetVReg(cur_quick_frame, code_item,
-                                                                core_spills, fp_spills, frame_size,
-                                                                reg));
-              }
-
-              if (ref != NULL) {
-                visitor_(ref, reg, this);
+                uint32_t* reg_addr =
+                    GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size, reg);
+                mirror::Object* ref = reinterpret_cast<mirror::Object*>(*reg_addr);
+                if (ref != nullptr) {
+                  mirror::Object* new_ref = visitor_(ref, reg, this);
+                  if (ref != new_ref) {
+                    *reg_addr = reinterpret_cast<uint32_t>(new_ref);
+                  }
+                }
               }
             }
           }
@@ -2107,8 +2062,8 @@
  public:
   RootCallbackVisitor(RootVisitor* visitor, void* arg) : visitor_(visitor), arg_(arg) {}
 
-  void operator()(const mirror::Object* obj, size_t, const StackVisitor*) const {
-    visitor_(obj, arg_);
+  mirror::Object* operator()(mirror::Object* obj, size_t, const StackVisitor*) const {
+    return visitor_(obj, arg_);
   }
 
  private:
@@ -2132,67 +2087,17 @@
   void* const arg_;
 };
 
-struct VerifyRootWrapperArg {
-  VerifyRootVisitor* visitor;
-  void* arg;
-};
-
-static void VerifyRootWrapperCallback(const mirror::Object* root, void* arg) {
-  VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg);
-  wrapperArg->visitor(root, wrapperArg->arg, 0, NULL);
-}
-
-void Thread::VerifyRoots(VerifyRootVisitor* visitor, void* arg) {
-  // We need to map from a RootVisitor to VerifyRootVisitor, so pass in nulls for arguments we
-  // don't have.
-  VerifyRootWrapperArg wrapperArg;
-  wrapperArg.arg = arg;
-  wrapperArg.visitor = visitor;
-
-  if (opeer_ != NULL) {
-    VerifyRootWrapperCallback(opeer_, &wrapperArg);
-  }
-  if (exception_ != NULL) {
-    VerifyRootWrapperCallback(exception_, &wrapperArg);
-  }
-  throw_location_.VisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-  if (class_loader_override_ != NULL) {
-    VerifyRootWrapperCallback(class_loader_override_, &wrapperArg);
-  }
-  jni_env_->locals.VisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-  jni_env_->monitors.VisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-
-  SirtVisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-
-  // Visit roots on this thread's stack
-  Context* context = GetLongJumpContext();
-  VerifyCallbackVisitor visitorToCallback(visitor, arg);
-  ReferenceMapVisitor<VerifyCallbackVisitor> mapper(this, context, visitorToCallback);
-  mapper.WalkStack();
-  ReleaseLongJumpContext(context);
-
-  std::deque<instrumentation::InstrumentationStackFrame>* instrumentation_stack = GetInstrumentationStack();
-  typedef std::deque<instrumentation::InstrumentationStackFrame>::const_iterator It;
-  for (It it = instrumentation_stack->begin(), end = instrumentation_stack->end(); it != end; ++it) {
-    mirror::Object* this_object = (*it).this_object_;
-    if (this_object != NULL) {
-      VerifyRootWrapperCallback(this_object, &wrapperArg);
-    }
-    mirror::ArtMethod* method = (*it).method_;
-    VerifyRootWrapperCallback(method, &wrapperArg);
-  }
-}
-
 void Thread::VisitRoots(RootVisitor* visitor, void* arg) {
-  if (opeer_ != NULL) {
-    visitor(opeer_, arg);
+  if (opeer_ != nullptr) {
+    opeer_ = visitor(opeer_, arg);
   }
-  if (exception_ != NULL) {
-    visitor(exception_, arg);
+  if (exception_ != nullptr) {
+    exception_ = reinterpret_cast<mirror::Throwable*>(visitor(exception_, arg));
   }
   throw_location_.VisitRoots(visitor, arg);
-  if (class_loader_override_ != NULL) {
-    visitor(class_loader_override_, arg);
+  if (class_loader_override_ != nullptr) {
+    class_loader_override_ = reinterpret_cast<mirror::ClassLoader*>(
+        visitor(class_loader_override_, arg));
   }
   jni_env_->locals.VisitRoots(visitor, arg);
   jni_env_->monitors.VisitRoots(visitor, arg);
@@ -2206,24 +2111,26 @@
   mapper.WalkStack();
   ReleaseLongJumpContext(context);
 
-  for (const instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
-    mirror::Object* this_object = frame.this_object_;
-    if (this_object != NULL) {
-      visitor(this_object, arg);
+  for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
+    if (frame.this_object_ != nullptr) {
+      frame.this_object_ = visitor(frame.this_object_, arg);
+      DCHECK(frame.this_object_ != nullptr);
     }
-    mirror::ArtMethod* method = frame.method_;
-    visitor(method, arg);
+    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
+    DCHECK(frame.method_ != nullptr);
   }
 }
 
-static void VerifyObject(const mirror::Object* root, void* arg) {
-  gc::Heap* heap = reinterpret_cast<gc::Heap*>(arg);
-  heap->VerifyObject(root);
+static mirror::Object* VerifyRoot(mirror::Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  reinterpret_cast<gc::Heap*>(arg)->VerifyObject(root);
+  return root;
 }
 
 void Thread::VerifyStackImpl() {
   UniquePtr<Context> context(Context::Create());
-  RootCallbackVisitor visitorToCallback(VerifyObject, Runtime::Current()->GetHeap());
+  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap());
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
   mapper.WalkStack();
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index 40e3f5f..3aa1373 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -17,8 +17,6 @@
 #ifndef ART_RUNTIME_THREAD_H_
 #define ART_RUNTIME_THREAD_H_
 
-#include <pthread.h>
-
 #include <bitset>
 #include <deque>
 #include <iosfwd>
@@ -104,16 +102,7 @@
   // Reset internal state of child thread after fork.
   void InitAfterFork();
 
-  static Thread* Current() {
-    // We rely on Thread::Current returning NULL for a detached thread, so it's not obvious
-    // that we can replace this with a direct %fs access on x86.
-    if (!is_started_) {
-      return NULL;
-    } else {
-      void* thread = pthread_getspecific(Thread::pthread_key_self_);
-      return reinterpret_cast<Thread*>(thread);
-    }
-  }
+  static Thread* Current();
 
   static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts,
                                    mirror::Object* thread_peer)
@@ -165,7 +154,8 @@
   void ModifySuspendCount(Thread* self, int delta, bool for_debugger)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
-  bool RequestCheckpoint(Closure* function);
+  bool RequestCheckpoint(Closure* function)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
   // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
   // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
@@ -186,14 +176,6 @@
       UNLOCK_FUNCTION(Locks::mutator_lock_)
       ALWAYS_INLINE;
 
-  // Wait for a debugger suspension on the thread associated with the given peer. Returns the
-  // thread on success, else NULL. If the thread should be suspended then request_suspension should
-  // be true on entry. If the suspension times out then *timeout is set to true.
-  static Thread* SuspendForDebugger(jobject peer,  bool request_suspension, bool* timed_out)
-      LOCKS_EXCLUDED(Locks::mutator_lock_,
-                     Locks::thread_list_lock_,
-                     Locks::thread_suspend_count_lock_);
-
   // Once called thread suspension will cause an assertion failure.
 #ifndef NDEBUG
   const char* StartAssertNoThreadSuspension(const char* cause) {
@@ -230,7 +212,7 @@
     return daemon_;
   }
 
-  bool HoldsLock(mirror::Object*);
+  bool HoldsLock(mirror::Object*) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Changes the priority of this thread to match that of the java.lang.Thread object.
@@ -248,8 +230,8 @@
    */
   static int GetNativePriority();
 
-  uint32_t GetThinLockId() const {
-    return thin_lock_id_;
+  uint32_t GetThreadId() const {
+    return thin_lock_thread_id_;
   }
 
   pid_t GetTid() const {
@@ -406,9 +388,6 @@
 
   void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VerifyRoots(VerifyRootVisitor* visitor, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void VerifyStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   //
@@ -428,7 +407,7 @@
   }
 
   static ThreadOffset ThinLockIdOffset() {
-    return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_id_));
+    return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
   }
 
   static ThreadOffset CardTableOffset() {
@@ -590,6 +569,8 @@
 
   void AtomicClearFlag(ThreadFlag flag);
 
+  void ResetQuickAllocEntryPointsForThread();
+
  private:
   // We have no control over the size of 'bool', but want our boolean fields
   // to be 4-byte quantities.
@@ -714,18 +695,18 @@
   // Size of the stack
   size_t stack_size_;
 
-  // Pointer to previous stack trace captured by sampling profiler.
-  std::vector<mirror::ArtMethod*>* stack_trace_sample_;
-
-  // The clock base used for tracing.
-  uint64_t trace_clock_base_;
-
   // Thin lock thread id. This is a small integer used by the thin lock implementation.
   // This is not to be confused with the native thread's tid, nor is it the value returned
   // by java.lang.Thread.getId --- this is a distinct value, used only for locking. One
   // important difference between this id and the ids visible to managed code is that these
   // ones get reused (to ensure that they fit in the number of bits available).
-  uint32_t thin_lock_id_;
+  uint32_t thin_lock_thread_id_;
+
+  // Pointer to previous stack trace captured by sampling profiler.
+  std::vector<mirror::ArtMethod*>* stack_trace_sample_;
+
+  // The clock base used for tracing.
+  uint64_t trace_clock_base_;
 
   // System thread id.
   pid_t tid_;
@@ -734,13 +715,16 @@
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
   mutable Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Condition variable waited upon during a wait.
   ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_);
-  // Pointer to the monitor lock we're currently waiting on (or NULL).
+  // Pointer to the monitor lock we're currently waiting on or NULL if not waiting.
   Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
   // Thread "interrupted" status; stays raised until queried or thrown.
   bool32_t interrupted_ GUARDED_BY(wait_mutex_);
-  // The next thread in the wait set this thread is part of.
+  // The next thread in the wait set this thread is part of or NULL if not waiting.
   Thread* wait_next_;
+
+
   // If we're blocked in MonitorEnter, this is the object we're trying to lock.
   mirror::Object* monitor_enter_object_;
 
@@ -797,7 +781,8 @@
   // Cause for last suspension.
   const char* last_no_thread_suspension_cause_;
 
-  // Pending checkpoint functions.
+  // Pending checkpoint function or NULL if non-pending. Installation guarding by
+  // Locks::thread_suspend_count_lock_.
   Closure* checkpoint_function_;
 
  public:
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 671924a..ff1ed2a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -17,14 +17,22 @@
 #include "thread_list.h"
 
 #include <dirent.h>
+#include <ScopedLocalRef.h>
+#include <ScopedUtfChars.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include "base/mutex.h"
+#include "base/mutex-inl.h"
 #include "base/timing_logger.h"
 #include "debugger.h"
+#include "jni_internal.h"
+#include "lock_word.h"
+#include "monitor.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -32,6 +40,7 @@
     : allocated_ids_lock_("allocated thread ids lock"),
       suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
+  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
 }
 
 ThreadList::~ThreadList() {
@@ -159,18 +168,19 @@
     // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
     // manually called.
     MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : list_) {
       if (thread != self) {
-        for (;;) {
+        while (true) {
           if (thread->RequestCheckpoint(checkpoint_function)) {
             // This thread will run it's checkpoint some time in the near future.
             count++;
             break;
           } else {
             // We are probably suspended, try to make sure that we stay suspended.
-            MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
             // The thread switched back to runnable.
             if (thread->GetState() == kRunnable) {
+              // Spurious fail, try again.
               continue;
             }
             thread->ModifySuspendCount(self, +1, false);
@@ -203,7 +213,7 @@
       }
     }
     // We know for sure that the thread is suspended at this point.
-    thread->RunCheckpointFunction();
+    checkpoint_function->Run(thread);
     {
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
       thread->ModifySuspendCount(self, -1, false);
@@ -321,6 +331,178 @@
   VLOG(threads) << "Resume(" << *thread << ") complete";
 }
 
+static void ThreadSuspendByPeerWarning(Thread* self, int level, const char* message, jobject peer) {
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedLocalRef<jstring>
+      scoped_name_string(env, (jstring)env->GetObjectField(peer,
+                                                          WellKnownClasses::java_lang_Thread_name));
+  ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
+  if (scoped_name_chars.c_str() == NULL) {
+      LOG(level) << message << ": " << peer;
+      env->ExceptionClear();
+  } else {
+      LOG(level) << message << ": " << peer << ":" << scoped_name_chars.c_str();
+  }
+}
+
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep and total_delay_us
+// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
+// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
+  for (int i = kLockLevelCount - 1; i >= 0; --i) {
+    BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
+    if (held_mutex != NULL) {
+      LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
+    }
+  }
+  {
+    useconds_t new_delay_us = (*delay_us) * 2;
+    CHECK_GE(new_delay_us, *delay_us);
+    if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
+      *delay_us = new_delay_us;
+    }
+  }
+  if ((*delay_us) == 0) {
+    sched_yield();
+    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
+    (*delay_us) = 500;
+  } else {
+    usleep(*delay_us);
+    (*total_delay_us) += (*delay_us);
+  }
+}
+
+Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
+                                        bool debug_suspension, bool* timed_out) {
+  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timed_out = false;
+  Thread* self = Thread::Current();
+  while (true) {
+    Thread* thread;
+    {
+      ScopedObjectAccess soa(self);
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      thread = Thread::FromManagedThread(soa, peer);
+      if (thread == NULL) {
+        ThreadSuspendByPeerWarning(self, WARNING, "No such thread for suspend", peer);
+        return NULL;
+      }
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        if (request_suspension) {
+          thread->ModifySuspendCount(self, +1, debug_suspension);
+          request_suspension = false;
+          did_suspend_request = true;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          ThreadSuspendByPeerWarning(self, ERROR, "Thread suspension timed out", peer);
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
+          }
+          *timed_out = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+  }
+}
+
+static void ThreadSuspendByThreadIdWarning(int level, const char* message, uint32_t thread_id) {
+  LOG(level) << StringPrintf("%s: %d", message, thread_id);
+}
+
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
+                                            bool* timed_out) {
+  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timed_out = false;
+  Thread* self = Thread::Current();
+  CHECK_NE(thread_id, kInvalidThreadId);
+  while (true) {
+    Thread* thread = NULL;
+    {
+      ScopedObjectAccess soa(self);
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      for (const auto& it : list_) {
+        if (it->GetThreadId() == thread_id) {
+          thread = it;
+          break;
+        }
+      }
+      if (thread == NULL) {
+        // There's a race in inflating a lock and the owner giving up ownership and then dying.
+        ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
+        return NULL;
+      }
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        if (!did_suspend_request) {
+          thread->ModifySuspendCount(self, +1, debug_suspension);
+          did_suspend_request = true;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          ThreadSuspendByThreadIdWarning(ERROR, "Thread suspension timed out", thread_id);
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
+          }
+          *timed_out = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+  }
+}
+
+Thread* ThreadList::FindThreadByThreadId(uint32_t thin_lock_id) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  for (const auto& thread : list_) {
+    if (thread->GetThreadId() == thin_lock_id) {
+      CHECK(thread == self || thread->IsSuspended());
+      return thread;
+    }
+  }
+  return NULL;
+}
+
 void ThreadList::SuspendAllForDebugger() {
   Thread* self = Thread::Current();
   Thread* debug_thread = Dbg::GetDebugThread();
@@ -527,8 +709,8 @@
   // suspend and so on, must happen at this point, and not in ~Thread.
   self->Destroy();
 
-  uint32_t thin_lock_id = self->thin_lock_id_;
-  self->thin_lock_id_ = 0;
+  uint32_t thin_lock_id = self->thin_lock_thread_id_;
+  self->thin_lock_thread_id_ = 0;
   ReleaseThreadId(self, thin_lock_id);
   while (self != NULL) {
     // Remove and delete the Thread* while holding the thread_list_lock_ and
@@ -568,10 +750,24 @@
   }
 }
 
+struct VerifyRootWrapperArg {
+  VerifyRootVisitor* visitor;
+  void* arg;
+};
+
+static mirror::Object* VerifyRootWrapperCallback(mirror::Object* root, void* arg) {
+  VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg);
+  wrapperArg->visitor(root, wrapperArg->arg, 0, NULL);
+  return root;
+}
+
 void ThreadList::VerifyRoots(VerifyRootVisitor* visitor, void* arg) const {
+  VerifyRootWrapperArg wrapper;
+  wrapper.visitor = visitor;
+  wrapper.arg = arg;
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VerifyRoots(visitor, arg);
+    thread->VisitRoots(VerifyRootWrapperCallback, &wrapper);
   }
 }
 
@@ -594,14 +790,4 @@
   allocated_ids_.reset(id);
 }
 
-Thread* ThreadList::FindThreadByThinLockId(uint32_t thin_lock_id) {
-  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
-  for (const auto& thread : list_) {
-    if (thread->GetThinLockId() == thin_lock_id) {
-      return thread;
-    }
-  }
-  return NULL;
-}
-
 }  // namespace art
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 3df3e2c..b1b3e88 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_THREAD_LIST_H_
 
 #include "base/mutex.h"
+#include "jni.h"
 #include "root_visitor.h"
 
 #include <bitset>
@@ -31,8 +32,8 @@
 class ThreadList {
  public:
   static const uint32_t kMaxThreadId = 0xFFFF;
-  static const uint32_t kInvalidId = 0;
-  static const uint32_t kMainId = 1;
+  static const uint32_t kInvalidThreadId = 0;
+  static const uint32_t kMainThreadId = 1;
 
   explicit ThreadList();
   ~ThreadList();
@@ -59,6 +60,30 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
+
+  // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
+  // else NULL. The peer is used to identify the thread to avoid races with the thread terminating.
+  // If the thread should be suspended then value of request_suspension should be true otherwise
+  // the routine will wait for a previous suspend request. If the suspension times out then *timeout
+  // is set to true.
+  static Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
+                                     bool* timed_out)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
+  // thread on success else NULL. The thread id is used to identify the thread to avoid races with
+  // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
+  // thread, that may be terminating. If the suspension times out then *timeout is set to true.
+  Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  // Find an already suspended thread (or self) by its id.
+  Thread* FindThreadByThreadId(uint32_t thin_lock_id);
+
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
   // of the suspend check. Returns how many checkpoints we should expect to run.
   size_t RunCheckpoint(Closure* checkpoint_function);
@@ -99,8 +124,6 @@
     return list_;
   }
 
-  Thread* FindThreadByThinLockId(uint32_t thin_lock_id);
-
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
diff --git a/runtime/throw_location.cc b/runtime/throw_location.cc
index e428511..01497ef 100644
--- a/runtime/throw_location.cc
+++ b/runtime/throw_location.cc
@@ -34,11 +34,14 @@
 }
 
 void ThrowLocation::VisitRoots(RootVisitor* visitor, void* arg) {
-  if (this_object_ != NULL) {
-    visitor(this_object_, arg);
+  if (this_object_ != nullptr) {
+    this_object_ = const_cast<mirror::Object*>(visitor(this_object_, arg));
+    DCHECK(this_object_ != nullptr);
   }
-  if (method_ != NULL) {
-    visitor(method_, arg);
+  if (method_ != nullptr) {
+    method_ = const_cast<mirror::ArtMethod*>(
+        reinterpret_cast<const mirror::ArtMethod*>(visitor(method_, arg)));
+    DCHECK(method_ != nullptr);
   }
 }
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 7b25306..ec95a87 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -667,7 +667,7 @@
     mh.ChangeMethod(method);
     os << StringPrintf("%p\t%s\t%s\t%s\t%s\n", method,
         PrettyDescriptor(mh.GetDeclaringClassDescriptor()).c_str(), mh.GetName(),
-        mh.GetSignature().c_str(), mh.GetDeclaringClassSourceFile());
+        mh.GetSignature().ToString().c_str(), mh.GetDeclaringClassSourceFile());
   }
 }
 
diff --git a/runtime/utf-inl.h b/runtime/utf-inl.h
new file mode 100644
index 0000000..d8c258b
--- /dev/null
+++ b/runtime/utf-inl.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_UTF_INL_H_
+#define ART_RUNTIME_UTF_INL_H_
+
+#include "utf.h"
+
+namespace art {
+
+inline uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
+  uint8_t one = *(*utf8_data_in)++;
+  if ((one & 0x80) == 0) {
+    // one-byte encoding
+    return one;
+  }
+  // two- or three-byte encoding
+  uint8_t two = *(*utf8_data_in)++;
+  if ((one & 0x20) == 0) {
+    // two-byte encoding
+    return ((one & 0x1f) << 6) | (two & 0x3f);
+  }
+  // three-byte encoding
+  uint8_t three = *(*utf8_data_in)++;
+  return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
+}
+
+inline int CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char* utf8_1,
+                                                                   const char* utf8_2) {
+  for (;;) {
+    if (*utf8_1 == '\0') {
+      return (*utf8_2 == '\0') ? 0 : -1;
+    } else if (*utf8_2 == '\0') {
+      return 1;
+    }
+
+    int c1 = GetUtf16FromUtf8(&utf8_1);
+    int c2 = GetUtf16FromUtf8(&utf8_2);
+
+    if (c1 != c2) {
+      return c1 > c2 ? 1 : -1;
+    }
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_UTF_INL_H_
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 1add7d9..5ec2ea1 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -19,6 +19,7 @@
 #include "base/logging.h"
 #include "mirror/array.h"
 #include "mirror/object-inl.h"
+#include "utf-inl.h"
 
 namespace art {
 
@@ -84,41 +85,6 @@
   return hash;
 }
 
-
-uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
-  uint8_t one = *(*utf8_data_in)++;
-  if ((one & 0x80) == 0) {
-    // one-byte encoding
-    return one;
-  }
-  // two- or three-byte encoding
-  uint8_t two = *(*utf8_data_in)++;
-  if ((one & 0x20) == 0) {
-    // two-byte encoding
-    return ((one & 0x1f) << 6) | (two & 0x3f);
-  }
-  // three-byte encoding
-  uint8_t three = *(*utf8_data_in)++;
-  return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
-}
-
-int CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char* utf8_1, const char* utf8_2) {
-  for (;;) {
-    if (*utf8_1 == '\0') {
-      return (*utf8_2 == '\0') ? 0 : -1;
-    } else if (*utf8_2 == '\0') {
-      return 1;
-    }
-
-    int c1 = GetUtf16FromUtf8(&utf8_1);
-    int c2 = GetUtf16FromUtf8(&utf8_2);
-
-    if (c1 != c2) {
-      return c1 > c2 ? 1 : -1;
-    }
-  }
-}
-
 int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8_1, const uint16_t* utf8_2) {
   for (;;) {
     if (*utf8_1 == '\0') {
diff --git a/runtime/utf.h b/runtime/utf.h
index 4c9a1d9..cc5e6d4 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -29,9 +29,10 @@
  * See http://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8 for the details.
  */
 namespace art {
+
 namespace mirror {
-template<class T> class PrimitiveArray;
-typedef PrimitiveArray<uint16_t> CharArray;
+  template<class T> class PrimitiveArray;
+  typedef PrimitiveArray<uint16_t> CharArray;
 }  // namespace mirror
 
 /*
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ac5cae2..1386f869 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -34,7 +34,7 @@
 #include "mirror/string.h"
 #include "object_utils.h"
 #include "os.h"
-#include "utf.h"
+#include "utf-inl.h"
 
 #if !defined(HAVE_POSIX_CLOCKS)
 #include <sys/time.h>
@@ -367,11 +367,13 @@
   result += '.';
   result += mh.GetName();
   if (with_signature) {
-    std::string signature(mh.GetSignature());
-    if (signature == "<no signature>") {
-      return result + signature;
+    const Signature signature = mh.GetSignature();
+    std::string sig_as_string(signature.ToString());
+    if (signature == Signature::NoSignature()) {
+      return result + sig_as_string;
     }
-    result = PrettyReturnType(signature.c_str()) + " " + result + PrettyArguments(signature.c_str());
+    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
+        PrettyArguments(sig_as_string.c_str());
   }
   return result;
 }
@@ -385,11 +387,13 @@
   result += '.';
   result += dex_file.GetMethodName(method_id);
   if (with_signature) {
-    std::string signature(dex_file.GetMethodSignature(method_id));
-    if (signature == "<no signature>") {
-      return result + signature;
+    const Signature signature = dex_file.GetMethodSignature(method_id);
+    std::string sig_as_string(signature.ToString());
+    if (signature == Signature::NoSignature()) {
+      return result + sig_as_string;
     }
-    result = PrettyReturnType(signature.c_str()) + " " + result + PrettyArguments(signature.c_str());
+    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
+        PrettyArguments(sig_as_string.c_str());
   }
   return result;
 }
@@ -641,7 +645,7 @@
   long_name += JniShortName(m);
   long_name += "__";
 
-  std::string signature(MethodHelper(m).GetSignature());
+  std::string signature(MethodHelper(m).GetSignature().ToString());
   signature.erase(0, 1);
   signature.erase(signature.begin() + signature.find(')'), signature.end());
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 34ebeb1..7d2ee19 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -51,7 +51,8 @@
                                  uint32_t insns_size, uint16_t registers_size,
                                  MethodVerifier* verifier) {
   DCHECK_GT(insns_size, 0U);
-
+  register_lines_.reset(new RegisterLine*[insns_size]());
+  size_ = insns_size;
   for (uint32_t i = 0; i < insns_size; i++) {
     bool interesting = false;
     switch (mode) {
@@ -68,7 +69,16 @@
         break;
     }
     if (interesting) {
-      pc_to_register_line_.Put(i, new RegisterLine(registers_size, verifier));
+      register_lines_[i] = RegisterLine::Create(registers_size, verifier);
+    }
+  }
+}
+
+PcToRegisterLineTable::~PcToRegisterLineTable() {
+  for (size_t i = 0; i < size_; i++) {
+    delete register_lines_[i];
+    if (kIsDebugBuild) {
+      register_lines_[i] = nullptr;
     }
   }
 }
@@ -80,7 +90,7 @@
     return kNoFailure;
   }
   mirror::Class* super = klass->GetSuperClass();
-  if (super == NULL && StringPiece(ClassHelper(klass).GetDescriptor()) != "Ljava/lang/Object;") {
+  if (super == NULL && ClassHelper(klass).GetDescriptorAsStringPiece() != "Ljava/lang/Object;") {
     *error = "Verifier rejected class ";
     *error += PrettyDescriptor(klass);
     *error += " that has no super class";
@@ -293,6 +303,7 @@
       dex_method_idx_(dex_method_idx),
       mirror_method_(method),
       method_access_flags_(method_access_flags),
+      return_type_(nullptr),
       dex_file_(dex_file),
       dex_cache_(dex_cache),
       class_loader_(class_loader),
@@ -300,7 +311,7 @@
       code_item_(code_item),
       declaring_class_(NULL),
       interesting_dex_pc_(-1),
-      monitor_enter_dex_pcs_(NULL),
+      monitor_enter_dex_pcs_(nullptr),
       have_pending_hard_failure_(false),
       have_pending_runtime_throw_failure_(false),
       new_instance_count_(0),
@@ -309,7 +320,7 @@
       allow_soft_failures_(allow_soft_failures),
       has_check_casts_(false),
       has_virtual_or_interface_invokes_(false) {
-  DCHECK(class_def != NULL);
+  DCHECK(class_def != nullptr);
 }
 
 void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
@@ -495,17 +506,25 @@
 
   while (dex_pc < insns_size) {
     Instruction::Code opcode = inst->Opcode();
-    if (opcode == Instruction::NEW_INSTANCE) {
-      new_instance_count++;
-    } else if (opcode == Instruction::MONITOR_ENTER) {
-      monitor_enter_count++;
-    } else if (opcode == Instruction::CHECK_CAST) {
-      has_check_casts_ = true;
-    } else if ((inst->Opcode() == Instruction::INVOKE_VIRTUAL) ||
-              (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE) ||
-              (inst->Opcode() == Instruction::INVOKE_INTERFACE) ||
-              (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE)) {
-      has_virtual_or_interface_invokes_ = true;
+    switch (opcode) {
+      case Instruction::APUT_OBJECT:
+      case Instruction::CHECK_CAST:
+        has_check_casts_ = true;
+        break;
+      case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_INTERFACE:
+      case Instruction::INVOKE_INTERFACE_RANGE:
+        has_virtual_or_interface_invokes_ = true;
+        break;
+      case Instruction::MONITOR_ENTER:
+        monitor_enter_count++;
+        break;
+      case Instruction::NEW_INSTANCE:
+        new_instance_count++;
+        break;
+      default:
+        break;
     }
     size_t inst_size = inst->SizeInCodeUnits();
     insn_flags_[dex_pc].SetLengthInCodeUnits(inst_size);
@@ -1034,8 +1053,8 @@
                   this);
 
 
-  work_line_.reset(new RegisterLine(registers_size, this));
-  saved_line_.reset(new RegisterLine(registers_size, this));
+  work_line_.reset(RegisterLine::Create(registers_size, this));
+  saved_line_.reset(RegisterLine::Create(registers_size, this));
 
   /* Initialize register types of method arguments. */
   if (!SetTypesFromSignature()) {
@@ -1935,7 +1954,7 @@
 
         if (!cast_type.IsUnresolvedTypes() && !orig_type.IsUnresolvedTypes() &&
             !cast_type.GetClass()->IsInterface() && !cast_type.IsAssignableFrom(orig_type)) {
-          RegisterLine* update_line = new RegisterLine(code_item_->registers_size_, this);
+          RegisterLine* update_line = RegisterLine::Create(code_item_->registers_size_, this);
           if (inst->Opcode() == Instruction::IF_EQZ) {
             fallthrough_line.reset(update_line);
           } else {
@@ -2131,20 +2150,30 @@
                         inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       mirror::ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL,
                                                                    is_range, is_super);
-      const char* descriptor;
-      if (called_method == NULL) {
+      const RegType* return_type = nullptr;
+      if (called_method != nullptr) {
+        MethodHelper mh(called_method);
+        mirror::Class* return_type_class = mh.GetReturnType();
+        if (return_type_class != nullptr) {
+          return_type = &reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
+                                              return_type_class->CannotBeAssignedFromOtherTypes());
+        } else {
+          Thread* self = Thread::Current();
+          DCHECK(self->IsExceptionPending());
+          self->ClearException();
+        }
+      }
+      if (return_type == nullptr) {
         uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
-        descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
-      } else {
-        descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
+        const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
+        return_type = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
-      if (!return_type.IsLowHalf()) {
-        work_line_->SetResultRegisterType(return_type);
+      if (!return_type->IsLowHalf()) {
+        work_line_->SetResultRegisterType(*return_type);
       } else {
-        work_line_->SetResultRegisterTypeWide(return_type, return_type.HighHalf(&reg_types_));
+        work_line_->SetResultRegisterTypeWide(*return_type, return_type->HighHalf(&reg_types_));
       }
       just_set_result = true;
       break;
@@ -2159,7 +2188,7 @@
       if (called_method == NULL) {
         uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-        is_constructor = StringPiece(dex_file_->GetMethodName(method_id)) == "<init>";
+        is_constructor = dex_file_->StringDataAsStringPieceByIdx(method_id.name_idx_) == "<init>";
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         return_type_descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
       } else {
@@ -2889,7 +2918,7 @@
 }
 
 mirror::ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(uint32_t dex_method_idx,
-                                                                    MethodType method_type) {
+                                                               MethodType method_type) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
@@ -2906,7 +2935,7 @@
   mirror::ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx);
   if (res_method == NULL) {
     const char* name = dex_file_->GetMethodName(method_id);
-    std::string signature(dex_file_->CreateMethodSignature(method_id.proto_idx_, NULL));
+    const Signature signature = dex_file_->GetMethodSignature(method_id);
 
     if (method_type == METHOD_DIRECT || method_type == METHOD_STATIC) {
       res_method = klass->FindDirectMethod(name, signature);
@@ -3504,22 +3533,26 @@
     const RegType& object_type = work_line_->GetRegisterType(inst->VRegB_22c());
     field = GetInstanceField(object_type, field_idx);
   }
-  const char* descriptor;
-  mirror::ClassLoader* loader;
+  const RegType* field_type = nullptr;
   if (field != NULL) {
-    descriptor = FieldHelper(field).GetTypeDescriptor();
-    loader = field->GetDeclaringClass()->GetClassLoader();
-  } else {
-    const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
-    descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    loader = class_loader_;
+    FieldHelper fh(field);
+    mirror::Class* field_type_class = fh.GetType(false);
+    if (field_type_class != nullptr) {
+      field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+                                         field_type_class->CannotBeAssignedFromOtherTypes());
+    }
   }
-  const RegType& field_type = reg_types_.FromDescriptor(loader, descriptor, false);
+  if (field_type == nullptr) {
+    const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
+    const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
+    mirror::ClassLoader* loader = class_loader_;
+    field_type = &reg_types_.FromDescriptor(loader, descriptor, false);
+  }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
-    if (field_type.Equals(insn_type) ||
-        (field_type.IsFloat() && insn_type.IsInteger()) ||
-        (field_type.IsDouble() && insn_type.IsLong())) {
+    if (field_type->Equals(insn_type) ||
+        (field_type->IsFloat() && insn_type.IsInteger()) ||
+        (field_type->IsDouble() && insn_type.IsLong())) {
       // expected that read is of the correct primitive type or that int reads are reading
       // floats or long reads are reading doubles
     } else {
@@ -3532,7 +3565,7 @@
       return;
     }
   } else {
-    if (!insn_type.IsAssignableFrom(field_type)) {
+    if (!insn_type.IsAssignableFrom(*field_type)) {
       Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
                                         << " to be compatible with type '" << insn_type
                                         << "' but found type '" << field_type
@@ -3541,10 +3574,10 @@
       return;
     }
   }
-  if (!field_type.IsLowHalf()) {
-    work_line_->SetRegisterType(vregA, field_type);
+  if (!field_type->IsLowHalf()) {
+    work_line_->SetRegisterType(vregA, *field_type);
   } else {
-    work_line_->SetRegisterTypeWide(vregA, field_type, field_type.HighHalf(&reg_types_));
+    work_line_->SetRegisterTypeWide(vregA, *field_type, field_type->HighHalf(&reg_types_));
   }
 }
 
@@ -3558,36 +3591,38 @@
     const RegType& object_type = work_line_->GetRegisterType(inst->VRegB_22c());
     field = GetInstanceField(object_type, field_idx);
   }
-  const char* descriptor;
-  mirror::ClassLoader* loader;
-  if (field != NULL) {
-    descriptor = FieldHelper(field).GetTypeDescriptor();
-    loader = field->GetDeclaringClass()->GetClassLoader();
-  } else {
-    const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
-    descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    loader = class_loader_;
-  }
-  const RegType& field_type = reg_types_.FromDescriptor(loader, descriptor, false);
+  const RegType* field_type = nullptr;
   if (field != NULL) {
     if (field->IsFinal() && field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
       Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << PrettyField(field)
                                       << " from other class " << GetDeclaringClass();
       return;
     }
+    FieldHelper fh(field);
+    mirror::Class* field_type_class = fh.GetType(false);
+    if (field_type_class != nullptr) {
+      field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+                                         field_type_class->CannotBeAssignedFromOtherTypes());
+    }
+  }
+  if (field_type == nullptr) {
+    const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
+    const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
+    mirror::ClassLoader* loader = class_loader_;
+    field_type = &reg_types_.FromDescriptor(loader, descriptor, false);
   }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
-    VerifyPrimitivePut(field_type, insn_type, vregA);
+    VerifyPrimitivePut(*field_type, insn_type, vregA);
   } else {
-    if (!insn_type.IsAssignableFrom(field_type)) {
+    if (!insn_type.IsAssignableFrom(*field_type)) {
       Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
                                         << " to be compatible with type '" << insn_type
                                         << "' but found type '" << field_type
                                         << "' in put-object";
       return;
     }
-    work_line_->VerifyRegisterType(vregA, field_type);
+    work_line_->VerifyRegisterType(vregA, *field_type);
   }
 }
 
@@ -3655,14 +3690,21 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field from " << inst->Name();
     return;
   }
-  const char* descriptor = FieldHelper(field).GetTypeDescriptor();
-  mirror::ClassLoader* loader = field->GetDeclaringClass()->GetClassLoader();
-  const RegType& field_type = reg_types_.FromDescriptor(loader, descriptor, false);
+  FieldHelper fh(field);
+  mirror::Class* field_type_class = fh.GetType(false);
+  const RegType* field_type;
+  if (field_type_class != nullptr) {
+    field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+                                       field_type_class->CannotBeAssignedFromOtherTypes());
+  } else {
+    field_type = &reg_types_.FromDescriptor(field->GetDeclaringClass()->GetClassLoader(),
+                                            fh.GetTypeDescriptor(), false);
+  }
   const uint32_t vregA = inst->VRegA_22c();
   if (is_primitive) {
-    if (field_type.Equals(insn_type) ||
-        (field_type.IsFloat() && insn_type.IsIntegralTypes()) ||
-        (field_type.IsDouble() && insn_type.IsLongTypes())) {
+    if (field_type->Equals(insn_type) ||
+        (field_type->IsFloat() && insn_type.IsIntegralTypes()) ||
+        (field_type->IsDouble() && insn_type.IsLongTypes())) {
       // expected that read is of the correct primitive type or that int reads are reading
       // floats or long reads are reading doubles
     } else {
@@ -3675,7 +3717,7 @@
       return;
     }
   } else {
-    if (!insn_type.IsAssignableFrom(field_type)) {
+    if (!insn_type.IsAssignableFrom(*field_type)) {
       Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
                                         << " to be compatible with type '" << insn_type
                                         << "' but found type '" << field_type
@@ -3684,10 +3726,10 @@
       return;
     }
   }
-  if (!field_type.IsLowHalf()) {
-    work_line_->SetRegisterType(vregA, field_type);
+  if (!field_type->IsLowHalf()) {
+    work_line_->SetRegisterType(vregA, *field_type);
   } else {
-    work_line_->SetRegisterTypeWide(vregA, field_type, field_type.HighHalf(&reg_types_));
+    work_line_->SetRegisterTypeWide(vregA, *field_type, field_type->HighHalf(&reg_types_));
   }
 }
 
@@ -3801,7 +3843,7 @@
     }
   } else {
     UniquePtr<RegisterLine> copy(gDebugVerify ?
-                                 new RegisterLine(target_line->NumRegs(), this) :
+                                 RegisterLine::Create(target_line->NumRegs(), this) :
                                  NULL);
     if (gDebugVerify) {
       copy->CopyFromLine(target_line);
@@ -3829,11 +3871,28 @@
 }
 
 const RegType& MethodVerifier::GetMethodReturnType() {
-  const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
-  const DexFile::ProtoId& proto_id = dex_file_->GetMethodPrototype(method_id);
-  uint16_t return_type_idx = proto_id.return_type_idx_;
-  const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(return_type_idx));
-  return reg_types_.FromDescriptor(class_loader_, descriptor, false);
+  if (return_type_ == nullptr) {
+    if (mirror_method_ != NULL) {
+      MethodHelper mh(mirror_method_);
+      mirror::Class* return_type_class = mh.GetReturnType();
+      if (return_type_class != nullptr) {
+        return_type_ =&reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
+                                            return_type_class->CannotBeAssignedFromOtherTypes());
+      } else {
+        Thread* self = Thread::Current();
+        DCHECK(self->IsExceptionPending());
+        self->ClearException();
+      }
+    }
+    if (return_type_ == nullptr) {
+      const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
+      const DexFile::ProtoId& proto_id = dex_file_->GetMethodPrototype(method_id);
+      uint16_t return_type_idx = proto_id.return_type_idx_;
+      const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(return_type_idx));
+      return_type_ = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+    }
+  }
+  return *return_type_;
 }
 
 const RegType& MethodVerifier::GetDeclaringClass() {
@@ -3889,18 +3948,32 @@
                                            code_item_->insns_size_in_code_units_);
 
   for (; inst < end; inst = inst->Next()) {
-    if (Instruction::CHECK_CAST != inst->Opcode()) {
-      continue;
-    }
-    uint32_t dex_pc = inst->GetDexPc(code_item_->insns_);
-    RegisterLine* line = reg_table_.GetLine(dex_pc);
-    const RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
-    const RegType& cast_type = ResolveClassAndCheckAccess(inst->VRegB_21c());
-    if (cast_type.IsStrictlyAssignableFrom(reg_type)) {
-      if (mscs.get() == NULL) {
-        mscs.reset(new MethodSafeCastSet());
+    Instruction::Code code = inst->Opcode();
+    if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
+      uint32_t dex_pc = inst->GetDexPc(code_item_->insns_);
+      RegisterLine* line = reg_table_.GetLine(dex_pc);
+      bool is_safe_cast = false;
+      if (code == Instruction::CHECK_CAST) {
+        const RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
+        const RegType& cast_type = ResolveClassAndCheckAccess(inst->VRegB_21c());
+        is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type);
+      } else {
+        const RegType& array_type(line->GetRegisterType(inst->VRegB_23x()));
+        // We only know its safe to assign to an array if the array type is precise. For example,
+        // an Object[] can have any type of object stored in it, but it may also be assigned a
+        // String[] in which case the stores need to be of Strings.
+        if (array_type.IsPreciseReference()) {
+          const RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
+          const RegType& component_type(reg_types_.GetComponentType(array_type, class_loader_));
+          is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
+        }
       }
-      mscs->insert(dex_pc);
+      if (is_safe_cast) {
+        if (mscs.get() == NULL) {
+          mscs.reset(new MethodSafeCastSet());
+        }
+        mscs->insert(dex_pc);
+      }
     }
   }
   return mscs.release();
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 073a2f7..57fde1d 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -110,10 +110,8 @@
 // execution of that instruction.
 class PcToRegisterLineTable {
  public:
-  PcToRegisterLineTable() {}
-  ~PcToRegisterLineTable() {
-    STLDeleteValues(&pc_to_register_line_);
-  }
+  PcToRegisterLineTable() : size_(0) {}
+  ~PcToRegisterLineTable();
 
   // Initialize the RegisterTable. Every instruction address can have a different set of information
   // about what's in which register, but for verification purposes we only need to store it at
@@ -122,17 +120,13 @@
             uint16_t registers_size, MethodVerifier* verifier);
 
   RegisterLine* GetLine(size_t idx) {
-    auto result = pc_to_register_line_.find(idx);
-    if (result == pc_to_register_line_.end()) {
-      return NULL;
-    } else {
-      return result->second;
-    }
+    DCHECK_LT(idx, size_);
+    return register_lines_[idx];
   }
 
  private:
-  typedef SafeMap<int32_t, RegisterLine*> Table;
-  Table pc_to_register_line_;
+  UniquePtr<RegisterLine*[]> register_lines_;
+  size_t size_;
 };
 
 // The verifier
@@ -688,6 +682,7 @@
   // Its object representation if known.
   mirror::ArtMethod* mirror_method_ GUARDED_BY(Locks::mutator_lock_);
   const uint32_t method_access_flags_;  // Method's access flags.
+  const RegType* return_type_;  // Lazily computed return type of the method.
   const DexFile* const dex_file_;  // The dex file containing the method.
   // The dex_cache for the declaring class of the method.
   mirror::DexCache* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
@@ -729,10 +724,12 @@
   // running and the verifier is called from the class linker.
   const bool allow_soft_failures_;
 
-  // Indicates if the method being verified contains at least one check-cast instruction.
+  // Indicates the method being verified contains at least one check-cast or aput-object
+  // instruction. Aput-object operations implicitly check for array-store exceptions, similar to
+  // check-cast.
   bool has_check_casts_;
 
-  // Indicates if the method being verified contains at least one invoke-virtual/range
+  // Indicates the method being verified contains at least one invoke-virtual/range
   // or invoke-interface/range.
   bool has_virtual_or_interface_invokes_;
 };
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 25f840c..50d1583 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -99,7 +99,7 @@
 }
 
 std::string BooleanType::Dump() const {
-  return "boolean";
+  return "Boolean";
 }
 
 std::string ConflictType::Dump() const {
@@ -111,7 +111,7 @@
 }
 
 std::string ShortType::Dump() const {
-  return "short";
+  return "Short";
 }
 
 std::string CharType::Dump() const {
@@ -119,15 +119,15 @@
 }
 
 std::string FloatType::Dump() const {
-  return "float";
+  return "Float";
 }
 
 std::string LongLoType::Dump() const {
-  return "long (Low Half)";
+  return "Long (Low Half)";
 }
 
 std::string LongHiType::Dump() const {
-  return "long (High Half)";
+  return "Long (High Half)";
 }
 
 std::string DoubleLoType::Dump() const {
@@ -461,7 +461,6 @@
   std::stringstream result;
   uint32_t val = ConstantValue();
   if (val == 0) {
-    CHECK(IsPreciseConstant());
     result << "Zero/null";
   } else {
     result << "Imprecise ";
@@ -762,11 +761,6 @@
   return AssignableFrom(*this, src, true);
 }
 
-int32_t ConstantType::ConstantValue() const {
-  DCHECK(IsConstantTypes());
-  return constant_;
-}
-
 int32_t ConstantType::ConstantValueLo() const {
   DCHECK(IsConstantLo());
   return constant_;
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 865ba20..f371733 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -574,9 +574,12 @@
 
   // If this is a 32-bit constant, what is the value? This value may be imprecise in which case
   // the value represents part of the integer range of values that may be held in the register.
-  virtual int32_t ConstantValue() const;
-  virtual int32_t ConstantValueLo() const;
-  virtual int32_t ConstantValueHi() const;
+  int32_t ConstantValue() const {
+    DCHECK(IsConstantTypes());
+    return constant_;
+  }
+  int32_t ConstantValueLo() const;
+  int32_t ConstantValueHi() const;
 
   bool IsZero() const {
     return IsPreciseConstant() && ConstantValue() == 0;
diff --git a/runtime/verifier/reg_type_cache-inl.h b/runtime/verifier/reg_type_cache-inl.h
index 295e271..fc9e5c9 100644
--- a/runtime/verifier/reg_type_cache-inl.h
+++ b/runtime/verifier/reg_type_cache-inl.h
@@ -23,17 +23,6 @@
 
 namespace art {
 namespace verifier {
-template <class Type>
-Type* RegTypeCache::CreatePrimitiveTypeInstance(const std::string& descriptor) {
-  mirror::Class* klass = NULL;
-  // Try loading the class from linker.
-  if (!descriptor.empty()) {
-    klass = art::Runtime::Current()->GetClassLinker()->FindSystemClass(descriptor.c_str());
-  }
-  Type* entry = Type::CreateInstance(klass, descriptor, RegTypeCache::primitive_count_);
-  RegTypeCache::primitive_count_++;
-  return entry;
-}
 
 inline const art::verifier::RegType& RegTypeCache::GetFromId(uint16_t id) const {
   DCHECK_LT(id, entries_.size());
@@ -41,6 +30,16 @@
   DCHECK(result != NULL);
   return *result;
 }
+
+inline const ConstantType& RegTypeCache::FromCat1Const(int32_t value, bool precise) {
+  // We only expect 0 to be a precise constant.
+  DCHECK(value != 0 || precise);
+  if (precise && (value >= kMinSmallConstant) && (value <= kMaxSmallConstant)) {
+    return *small_precise_constants_[value - kMinSmallConstant];
+  }
+  return FromCat1NonSmallConstant(value, precise);
+}
+
 }  // namespace verifier
 }  // namespace art
 #endif  // ART_RUNTIME_VERIFIER_REG_TYPE_CACHE_INL_H_
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 2c18132..446dd00 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -26,8 +26,8 @@
 namespace verifier {
 
 bool RegTypeCache::primitive_initialized_ = false;
-uint16_t RegTypeCache::primitive_start_ = 0;
 uint16_t RegTypeCache::primitive_count_ = 0;
+PreciseConstType* RegTypeCache::small_precise_constants_[kMaxSmallConstant - kMinSmallConstant + 1];
 
 static bool MatchingPrecisionForClass(RegType* entry, bool precise)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -44,7 +44,7 @@
   }
 }
 
-void RegTypeCache::FillPrimitiveTypes() {
+void RegTypeCache::FillPrimitiveAndSmallConstantTypes() {
   entries_.push_back(UndefinedType::GetInstance());
   entries_.push_back(ConflictType::GetInstance());
   entries_.push_back(BooleanType::GetInstance());
@@ -57,6 +57,11 @@
   entries_.push_back(FloatType::GetInstance());
   entries_.push_back(DoubleLoType::GetInstance());
   entries_.push_back(DoubleHiType::GetInstance());
+  for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
+    int32_t i = value - kMinSmallConstant;
+    DCHECK_EQ(entries_.size(), small_precise_constants_[i]->GetId());
+    entries_.push_back(small_precise_constants_[i]);
+  }
   DCHECK_EQ(entries_.size(), primitive_count_);
 }
 
@@ -205,6 +210,7 @@
 }
 
 const RegType& RegTypeCache::FromClass(const char* descriptor, mirror::Class* klass, bool precise) {
+  DCHECK(klass != nullptr);
   if (klass->IsPrimitive()) {
     // Note: precise isn't used for primitive classes. A char is assignable to an int. All
     // primitive classes are final.
@@ -232,12 +238,12 @@
 RegTypeCache::~RegTypeCache() {
   CHECK_LE(primitive_count_, entries_.size());
   // Delete only the non primitive types.
-  if (entries_.size() == kNumPrimitives) {
-    // All entries are primitive, nothing to delete.
+  if (entries_.size() == kNumPrimitivesAndSmallConstants) {
+    // All entries are from the global pool, nothing to delete.
     return;
   }
   std::vector<RegType*>::iterator non_primitive_begin = entries_.begin();
-  std::advance(non_primitive_begin, kNumPrimitives);
+  std::advance(non_primitive_begin, kNumPrimitivesAndSmallConstants);
   STLDeleteContainerPointers(non_primitive_begin, entries_.end());
 }
 
@@ -255,12 +261,29 @@
     FloatType::Destroy();
     DoubleLoType::Destroy();
     DoubleHiType::Destroy();
+    for (uint16_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
+      PreciseConstType* type = small_precise_constants_[value - kMinSmallConstant];
+      delete type;
+    }
+
     RegTypeCache::primitive_initialized_ = false;
     RegTypeCache::primitive_count_ = 0;
   }
 }
 
-void RegTypeCache::CreatePrimitiveTypes() {
+template <class Type>
+Type* RegTypeCache::CreatePrimitiveTypeInstance(const std::string& descriptor) {
+  mirror::Class* klass = NULL;
+  // Try loading the class from linker.
+  if (!descriptor.empty()) {
+    klass = art::Runtime::Current()->GetClassLinker()->FindSystemClass(descriptor.c_str());
+  }
+  Type* entry = Type::CreateInstance(klass, descriptor, RegTypeCache::primitive_count_);
+  RegTypeCache::primitive_count_++;
+  return entry;
+}
+
+void RegTypeCache::CreatePrimitiveAndSmallConstantTypes() {
   CreatePrimitiveTypeInstance<UndefinedType>("");
   CreatePrimitiveTypeInstance<ConflictType>("");
   CreatePrimitiveTypeInstance<BooleanType>("Z");
@@ -273,6 +296,11 @@
   CreatePrimitiveTypeInstance<FloatType>("F");
   CreatePrimitiveTypeInstance<DoubleLoType>("D");
   CreatePrimitiveTypeInstance<DoubleHiType>("D");
+  for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
+    PreciseConstType* type = new PreciseConstType(value, primitive_count_);
+    small_precise_constants_[value - kMinSmallConstant] = type;
+    primitive_count_++;
+  }
 }
 
 const RegType& RegTypeCache::FromUnresolvedMerge(const RegType& left, const RegType& right) {
@@ -331,29 +359,28 @@
   return *entry;
 }
 
-const RegType& RegTypeCache::Uninitialized(const RegType& type, uint32_t allocation_pc) {
-  RegType* entry = NULL;
-  RegType* cur_entry = NULL;
+const UninitializedType& RegTypeCache::Uninitialized(const RegType& type, uint32_t allocation_pc) {
+  UninitializedType* entry = NULL;
   const std::string& descriptor(type.GetDescriptor());
   if (type.IsUnresolvedTypes()) {
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
-      cur_entry = entries_[i];
+      RegType* cur_entry = entries_[i];
       if (cur_entry->IsUnresolvedAndUninitializedReference() &&
           down_cast<UnresolvedUninitializedRefType*>(cur_entry)->GetAllocationPc() == allocation_pc &&
           (cur_entry->GetDescriptor() == descriptor)) {
-        return *cur_entry;
+        return *down_cast<UnresolvedUninitializedRefType*>(cur_entry);
       }
     }
     entry = new UnresolvedUninitializedRefType(descriptor, allocation_pc, entries_.size());
   } else {
     mirror::Class* klass = type.GetClass();
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
-      cur_entry = entries_[i];
+      RegType* cur_entry = entries_[i];
       if (cur_entry->IsUninitializedReference() &&
           down_cast<UninitializedReferenceType*>(cur_entry)
               ->GetAllocationPc() == allocation_pc &&
           cur_entry->GetClass() == klass) {
-        return *cur_entry;
+        return *down_cast<UninitializedReferenceType*>(cur_entry);
       }
     }
     entry = new UninitializedReferenceType(klass, descriptor, allocation_pc, entries_.size());
@@ -404,27 +431,33 @@
   return *entry;
 }
 
-const RegType& RegTypeCache::ByteConstant() {
-  return FromCat1Const(std::numeric_limits<jbyte>::min(), false);
+const ImpreciseConstType& RegTypeCache::ByteConstant() {
+  const ConstantType& result = FromCat1Const(std::numeric_limits<jbyte>::min(), false);
+  DCHECK(result.IsImpreciseConstant());
+  return *down_cast<const ImpreciseConstType*>(&result);
 }
 
-const RegType& RegTypeCache::ShortConstant() {
-  return FromCat1Const(std::numeric_limits<jshort>::min(), false);
+const ImpreciseConstType& RegTypeCache::ShortConstant() {
+  const ConstantType& result =  FromCat1Const(std::numeric_limits<jshort>::min(), false);
+  DCHECK(result.IsImpreciseConstant());
+  return *down_cast<const ImpreciseConstType*>(&result);
 }
 
-const RegType& RegTypeCache::IntConstant() {
-  return FromCat1Const(std::numeric_limits<jint>::max(), false);
+const ImpreciseConstType& RegTypeCache::IntConstant() {
+  const ConstantType& result = FromCat1Const(std::numeric_limits<jint>::max(), false);
+  DCHECK(result.IsImpreciseConstant());
+  return *down_cast<const ImpreciseConstType*>(&result);
 }
 
-const RegType& RegTypeCache::UninitializedThisArgument(const RegType& type) {
-  RegType* entry;
+const UninitializedType& RegTypeCache::UninitializedThisArgument(const RegType& type) {
+  UninitializedType* entry;
   const std::string& descriptor(type.GetDescriptor());
   if (type.IsUnresolvedTypes()) {
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
       RegType* cur_entry = entries_[i];
       if (cur_entry->IsUnresolvedAndUninitializedThisReference() &&
           cur_entry->GetDescriptor() == descriptor) {
-        return *cur_entry;
+        return *down_cast<UninitializedType*>(cur_entry);
       }
     }
     entry = new UnresolvedUninitializedThisRefType(descriptor, entries_.size());
@@ -433,7 +466,7 @@
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
       RegType* cur_entry = entries_[i];
       if (cur_entry->IsUninitializedThisReference() && cur_entry->GetClass() == klass) {
-        return *cur_entry;
+        return *down_cast<UninitializedType*>(cur_entry);
       }
     }
     entry = new UninitializedThisReferenceType(klass, descriptor, entries_.size());
@@ -442,16 +475,16 @@
   return *entry;
 }
 
-const RegType& RegTypeCache::FromCat1Const(int32_t value, bool precise) {
+const ConstantType& RegTypeCache::FromCat1NonSmallConstant(int32_t value, bool precise) {
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
     RegType* cur_entry = entries_[i];
     if (cur_entry->klass_ == NULL && cur_entry->IsConstant() &&
         cur_entry->IsPreciseConstant() == precise &&
         (down_cast<ConstantType*>(cur_entry))->ConstantValue() == value) {
-      return *cur_entry;
+      return *down_cast<ConstantType*>(cur_entry);
     }
   }
-  RegType* entry;
+  ConstantType* entry;
   if (precise) {
     entry = new PreciseConstType(value, entries_.size());
   } else {
@@ -461,15 +494,15 @@
   return *entry;
 }
 
-const RegType& RegTypeCache::FromCat2ConstLo(int32_t value, bool precise) {
+const ConstantType& RegTypeCache::FromCat2ConstLo(int32_t value, bool precise) {
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
     RegType* cur_entry = entries_[i];
     if (cur_entry->IsConstantLo() && (cur_entry->IsPrecise() == precise) &&
         (down_cast<ConstantType*>(cur_entry))->ConstantValueLo() == value) {
-      return *cur_entry;
+      return *down_cast<ConstantType*>(cur_entry);
     }
   }
-  RegType* entry;
+  ConstantType* entry;
   if (precise) {
     entry = new PreciseConstLoType(value, entries_.size());
   } else {
@@ -479,15 +512,15 @@
   return *entry;
 }
 
-const RegType& RegTypeCache::FromCat2ConstHi(int32_t value, bool precise) {
+const ConstantType& RegTypeCache::FromCat2ConstHi(int32_t value, bool precise) {
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
     RegType* cur_entry = entries_[i];
     if (cur_entry->IsConstantHi() && (cur_entry->IsPrecise() == precise) &&
         (down_cast<ConstantType*>(cur_entry))->ConstantValueHi() == value) {
-      return *cur_entry;
+      return *down_cast<ConstantType*>(cur_entry);
     }
   }
-  RegType* entry;
+  ConstantType* entry;
   if (precise) {
     entry = new PreciseConstHiType(value, entries_.size());
   } else {
@@ -498,8 +531,9 @@
 }
 
 const RegType& RegTypeCache::GetComponentType(const RegType& array, mirror::ClassLoader* loader) {
-  CHECK(array.IsArrayTypes());
-  if (array.IsUnresolvedTypes()) {
+  if (!array.IsArrayTypes()) {
+    return Conflict();
+  } else if (array.IsUnresolvedTypes()) {
     const std::string& descriptor(array.GetDescriptor());
     const std::string component(descriptor.substr(1, descriptor.size() - 1));
     return FromDescriptor(loader, component.c_str(), false);
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 77f5893..a9f8bff 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -35,19 +35,18 @@
 
 class RegType;
 
-const size_t kNumPrimitives = 12;
 class RegTypeCache {
  public:
   explicit RegTypeCache(bool can_load_classes) : can_load_classes_(can_load_classes) {
     entries_.reserve(64);
-    FillPrimitiveTypes();
+    FillPrimitiveAndSmallConstantTypes();
   }
   ~RegTypeCache();
   static void Init() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (!RegTypeCache::primitive_initialized_) {
       CHECK_EQ(RegTypeCache::primitive_count_, 0);
-      CreatePrimitiveTypes();
-      CHECK_EQ(RegTypeCache::primitive_count_, kNumPrimitives);
+      CreatePrimitiveAndSmallConstantTypes();
+      CHECK_EQ(RegTypeCache::primitive_count_, kNumPrimitivesAndSmallConstants);
       RegTypeCache::primitive_initialized_ = true;
     }
   }
@@ -55,17 +54,13 @@
   const art::verifier::RegType& GetFromId(uint16_t id) const;
   const RegType& From(mirror::ClassLoader* loader, const char* descriptor, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template <class Type>
-  static Type* CreatePrimitiveTypeInstance(const std::string& descriptor)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FillPrimitiveTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& FromClass(const char* descriptor, mirror::Class* klass, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const RegType& FromCat1Const(int32_t value, bool precise)
+  const ConstantType& FromCat1Const(int32_t value, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const RegType& FromCat2ConstLo(int32_t value, bool precise)
+  const ConstantType& FromCat2ConstLo(int32_t value, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const RegType& FromCat2ConstHi(int32_t value, bool precise)
+  const ConstantType& FromCat2ConstHi(int32_t value, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& FromDescriptor(mirror::ClassLoader* loader, const char* descriptor, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -129,34 +124,56 @@
   const RegType& JavaLangObject(bool precise) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return From(NULL, "Ljava/lang/Object;", precise);
   }
-  const RegType& Uninitialized(const RegType& type, uint32_t allocation_pc)
+  const UninitializedType& Uninitialized(const RegType& type, uint32_t allocation_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Create an uninitialized 'this' argument for the given type.
-  const RegType& UninitializedThisArgument(const RegType& type)
+  const UninitializedType& UninitializedThisArgument(const RegType& type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& FromUninitialized(const RegType& uninit_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const RegType& ByteConstant() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const RegType& ShortConstant() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const RegType& IntConstant() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const ImpreciseConstType& ByteConstant() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const ImpreciseConstType& ShortConstant() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const ImpreciseConstType& IntConstant() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& GetComponentType(const RegType& array, mirror::ClassLoader* loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& RegTypeFromPrimitiveType(Primitive::Type) const;
 
  private:
-  std::vector<RegType*> entries_;
-  static bool primitive_initialized_;
-  static uint16_t primitive_start_;
-  static uint16_t primitive_count_;
-  static void CreatePrimitiveTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  // Whether or not we're allowed to load classes.
-  const bool can_load_classes_;
+  void FillPrimitiveAndSmallConstantTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::Class* ResolveClass(const char* descriptor, mirror::ClassLoader* loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ClearException();
   bool MatchDescriptor(size_t idx, const char* descriptor, bool precise)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const ConstantType& FromCat1NonSmallConstant(int32_t value, bool precise)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <class Type>
+  static Type* CreatePrimitiveTypeInstance(const std::string& descriptor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void CreatePrimitiveAndSmallConstantTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // The actual storage for the RegTypes.
+  std::vector<RegType*> entries_;
+
+  // A quick look up for popular small constants.
+  static constexpr int32_t kMinSmallConstant = -1;
+  static constexpr int32_t kMaxSmallConstant = 4;
+  static PreciseConstType* small_precise_constants_[kMaxSmallConstant - kMinSmallConstant + 1];
+
+  static constexpr size_t kNumPrimitivesAndSmallConstants =
+      12 + (kMaxSmallConstant - kMinSmallConstant + 1);
+
+  // Have the well known global primitives been created?
+  static bool primitive_initialized_;
+
+  // Number of well known primitives that will be copied into a RegTypeCache upon construction.
+  static uint16_t primitive_count_;
+
+  // Whether or not we're allowed to load classes.
+  const bool can_load_classes_;
+
   DISALLOW_COPY_AND_ASSIGN(RegTypeCache);
 };
 
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index a615cc1..1a41657 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -456,8 +456,7 @@
 
 bool RegisterLine::MergeRegisters(const RegisterLine* incoming_line) {
   bool changed = false;
-  CHECK(NULL != incoming_line);
-  CHECK(NULL != line_.get());
+  DCHECK(incoming_line != nullptr);
   for (size_t idx = 0; idx < num_regs_; idx++) {
     if (line_[idx] != incoming_line->line_[idx]) {
       const RegType& incoming_reg_type = incoming_line->GetRegisterType(idx);
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index f19dcca..8b2dadb 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_VERIFIER_REGISTER_LINE_H_
 #define ART_RUNTIME_VERIFIER_REGISTER_LINE_H_
 
-#include <deque>
 #include <vector>
 
 #include "dex_instruction.h"
@@ -51,12 +50,10 @@
 // stack of entered monitors (identified by code unit offset).
 class RegisterLine {
  public:
-  RegisterLine(size_t num_regs, MethodVerifier* verifier)
-      : line_(new uint16_t[num_regs]),
-        verifier_(verifier),
-        num_regs_(num_regs) {
-    memset(line_.get(), 0, num_regs_ * sizeof(uint16_t));
-    SetResultTypeToUnknown();
+  static RegisterLine* Create(size_t num_regs, MethodVerifier* verifier) {
+    uint8_t* memory = new uint8_t[sizeof(RegisterLine) + (num_regs * sizeof(uint16_t))];
+    RegisterLine* rl = new (memory) RegisterLine(num_regs, verifier);
+    return rl;
   }
 
   // Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
@@ -108,7 +105,7 @@
 
   void CopyFromLine(const RegisterLine* src) {
     DCHECK_EQ(num_regs_, src->num_regs_);
-    memcpy(line_.get(), src->line_.get(), num_regs_ * sizeof(uint16_t));
+    memcpy(&line_, &src->line_, num_regs_ * sizeof(uint16_t));
     monitors_ = src->monitors_;
     reg_to_lock_depths_ = src->reg_to_lock_depths_;
   }
@@ -116,7 +113,7 @@
   std::string Dump() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FillWithGarbage() {
-    memset(line_.get(), 0xf1, num_regs_ * sizeof(uint16_t));
+    memset(&line_, 0xf1, num_regs_ * sizeof(uint16_t));
     while (!monitors_.empty()) {
       monitors_.pop_back();
     }
@@ -161,7 +158,7 @@
   int CompareLine(const RegisterLine* line2) const {
     DCHECK(monitors_ == line2->monitors_);
     // TODO: DCHECK(reg_to_lock_depths_ == line2->reg_to_lock_depths_);
-    return memcmp(line_.get(), line2->line_.get(), num_regs_ * sizeof(uint16_t));
+    return memcmp(&line_, &line2->line_, num_regs_ * sizeof(uint16_t));
   }
 
   size_t NumRegs() const {
@@ -339,23 +336,30 @@
     reg_to_lock_depths_.erase(reg);
   }
 
+  RegisterLine(size_t num_regs, MethodVerifier* verifier)
+      : verifier_(verifier),
+        num_regs_(num_regs) {
+    memset(&line_, 0, num_regs_ * sizeof(uint16_t));
+    SetResultTypeToUnknown();
+  }
+
   // Storage for the result register's type, valid after an invocation
   uint16_t result_[2];
 
-  // An array of RegType Ids associated with each dex register
-  UniquePtr<uint16_t[]> line_;
-
   // Back link to the verifier
   MethodVerifier* verifier_;
 
   // Length of reg_types_
   const uint32_t num_regs_;
   // A stack of monitor enter locations
-  std::deque<uint32_t> monitors_;
+  std::vector<uint32_t> monitors_;
   // A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
   // stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
   // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
   SafeMap<uint32_t, uint32_t> reg_to_lock_depths_;
+
+  // An array of RegType Ids associated with each dex register.
+  uint16_t line_[0];
 };
 std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs);
 
diff --git a/test/Android.mk b/test/Android.mk
index da469d7..cdd61f0 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -23,8 +23,8 @@
 TEST_DEX_DIRECTORIES := \
 	AbstractMethod \
 	AllFields \
-	CreateMethodSignature \
 	ExceptionHandle \
+	GetMethodSignature \
 	Interfaces \
 	Main \
 	MyClass \
diff --git a/test/CreateMethodSignature/CreateMethodSignature.java b/test/GetMethodSignature/GetMethodSignature.java
similarity index 86%
rename from test/CreateMethodSignature/CreateMethodSignature.java
rename to test/GetMethodSignature/GetMethodSignature.java
index f6cd6ae..c2ba948 100644
--- a/test/CreateMethodSignature/CreateMethodSignature.java
+++ b/test/GetMethodSignature/GetMethodSignature.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-class CreateMethodSignature {
+class GetMethodSignature {
     Float m1(int a, double b, long c, Object d) { return null; }
-    CreateMethodSignature m2(boolean x, short y, char z) { return null; }
+    GetMethodSignature m2(boolean x, short y, char z) { return null; }
 }
diff --git a/test/run-test b/test/run-test
index 11dcfc5..c449e84 100755
--- a/test/run-test
+++ b/test/run-test
@@ -269,7 +269,7 @@
         fi
     fi
     # Clean up extraneous files that are not used by tests.
-    find $tmp_dir -mindepth 1  ! -regex ".*/\(.*jar\|$build_output\|$expected\)" | xargs rm -rf
+    find $tmp_dir -mindepth 1  ! -regex ".*/\(.*jar\|$output\|$expected\)" | xargs rm -rf
     exit 0
 else
     "./${build}" >"$build_output" 2>&1