189 files changed, 4716 insertions, 3599 deletions
diff --git a/Android.mk b/Android.mk
index f8d46a4e3a..1edd543eb9 100644
--- a/Android.mk
+++ b/Android.mk
@@ -24,15 +24,16 @@ art_path := $(LOCAL_PATH)
 
 include $(art_path)/build/Android.common_path.mk
 
-# following the example of build's dont_bother for clean targets
-ifneq (,$(filter clean-oat,$(MAKECMDGOALS)))
-art_dont_bother := true
+# Following the example of build's dont_bother for clean targets.
+art_dont_bother := false
+ifneq (,$(filter clean-oat%,$(MAKECMDGOALS)))
+  art_dont_bother := true
 endif
-ifneq (,$(filter clean-oat-host,$(MAKECMDGOALS)))
-art_dont_bother := true
-endif
-ifneq (,$(filter clean-oat-target,$(MAKECMDGOALS)))
-art_dont_bother := true
+
+# Don't bother with tests unless there is a test-art* or build-art* target.
+art_test_bother := false
+ifneq (,$(filter %tests test-art% build-art%,$(MAKECMDGOALS)))
+  art_test_bother := true
 endif
 
 .PHONY: clean-oat
@@ -131,6 +132,8 @@ endif
 ########################################################################
 # test rules
 
+ifeq ($(art_test_bother),true)
+
 # All the dependencies that must be built ahead of sync-ing them onto the target device.
 TEST_ART_TARGET_SYNC_DEPS :=
 
@@ -300,6 +303,8 @@ test-art-target-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 endif
 
+endif  # art_test_bother
+
 ########################################################################
 # oat-target and oat-target-sync rules
 
@@ -452,3 +457,7 @@ use-art-verify-none:
 ########################################################################
 
 endif # !art_dont_bother
+
+# Clear locally used variables.
+art_dont_bother :=
+art_test_bother :=
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 7ac1c6b377..8b5e6d58f8 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -95,6 +95,7 @@ LIBART_COMPILER_SRC_FILES := \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
+	optimizing/instruction_simplifier.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
@@ -125,7 +126,6 @@ LIBART_COMPILER_SRC_FILES := \
 	buffered_output_stream.cc \
 	compiler.cc \
 	elf_fixup.cc \
-	elf_patcher.cc \
 	elf_stripper.cc \
 	elf_writer.cc \
 	elf_writer_quick.cc \
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index ba5bd30b01..698bf3b670 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -152,14 +152,16 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                const std::vector<uint8_t>& mapping_table,
                                const std::vector<uint8_t>& vmap_table,
                                const std::vector<uint8_t>& native_gc_map,
-                               const std::vector<uint8_t>* cfi_info)
+                               const std::vector<uint8_t>* cfi_info,
+                               const ArrayRef<LinkerPatch>& patches)
     : CompiledCode(driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
       src_mapping_table_(driver->DeduplicateSrcMappingTable(src_mapping_table->Arrange())),
       mapping_table_(driver->DeduplicateMappingTable(mapping_table)),
       vmap_table_(driver->DeduplicateVMapTable(vmap_table)),
       gc_map_(driver->DeduplicateGCMap(native_gc_map)),
-      cfi_info_(driver->DeduplicateCFIInfo(cfi_info)) {
+      cfi_info_(driver->DeduplicateCFIInfo(cfi_info)),
+      patches_(patches.begin(), patches.end()) {
 }
 
 CompiledMethod::CompiledMethod(CompilerDriver* driver,
@@ -178,7 +180,8 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
       mapping_table_(driver->DeduplicateMappingTable(mapping_table)),
       vmap_table_(driver->DeduplicateVMapTable(stack_map)),
       gc_map_(nullptr),
-      cfi_info_(nullptr) {
+      cfi_info_(nullptr),
+      patches_() {
 }
 
 CompiledMethod::CompiledMethod(CompilerDriver* driver,
@@ -195,7 +198,8 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
       mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
       vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
       gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())),
-      cfi_info_(driver->DeduplicateCFIInfo(cfi_info)) {
+      cfi_info_(driver->DeduplicateCFIInfo(cfi_info)),
+      patches_() {
 }
 
 // Constructs a CompiledMethod for the Portable compiler.
@@ -208,7 +212,9 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instructio
       src_mapping_table_(driver->DeduplicateSrcMappingTable(SrcMap())),
       mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
       vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
-      gc_map_(driver->DeduplicateGCMap(gc_map)) {
+      gc_map_(driver->DeduplicateGCMap(gc_map)),
+      cfi_info_(nullptr),
+      patches_() {
 }
 
 CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
@@ -219,7 +225,9 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instructio
       src_mapping_table_(driver->DeduplicateSrcMappingTable(SrcMap())),
       mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
       vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
-      gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())) {
+      gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())),
+      cfi_info_(nullptr),
+      patches_() {
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index cc46b92dc5..cdae8d2d24 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -22,7 +22,9 @@
 #include <vector>
 
 #include "instruction_set.h"
+#include "method_reference.h"
 #include "utils.h"
+#include "utils/array_ref.h"
 
 namespace llvm {
   class Function;
@@ -171,6 +173,101 @@ class SrcMap FINAL : public std::vector<SrcMapElem> {
   }
 };
 
+enum LinkerPatchType {
+  kLinkerPatchMethod,
+  kLinkerPatchCall,
+  kLinkerPatchCallRelative,  // NOTE: Actual patching is instruction_set-dependent.
+  kLinkerPatchType,
+};
+
+class LinkerPatch {
+ public:
+  static LinkerPatch MethodPatch(size_t literal_offset,
+                                 const DexFile* target_dex_file,
+                                 uint32_t target_method_idx) {
+    return LinkerPatch(literal_offset, kLinkerPatchMethod,
+                       target_method_idx, target_dex_file);
+  }
+
+  static LinkerPatch CodePatch(size_t literal_offset,
+                               const DexFile* target_dex_file,
+                               uint32_t target_method_idx) {
+    return LinkerPatch(literal_offset, kLinkerPatchCall,
+                       target_method_idx, target_dex_file);
+  }
+
+  static LinkerPatch RelativeCodePatch(size_t literal_offset,
+                                       const DexFile* target_dex_file,
+                                       uint32_t target_method_idx) {
+    return LinkerPatch(literal_offset, kLinkerPatchCallRelative,
+                       target_method_idx, target_dex_file);
+  }
+
+  static LinkerPatch TypePatch(size_t literal_offset,
+                               const DexFile* target_dex_file,
+                               uint32_t target_type_idx) {
+    return LinkerPatch(literal_offset, kLinkerPatchType, target_type_idx, target_dex_file);
+  }
+
+  LinkerPatch(const LinkerPatch& other) = default;
+  LinkerPatch& operator=(const LinkerPatch& other) = default;
+
+  size_t LiteralOffset() const {
+    return literal_offset_;
+  }
+
+  LinkerPatchType Type() const {
+    return patch_type_;
+  }
+
+  MethodReference TargetMethod() const {
+    DCHECK(patch_type_ == kLinkerPatchMethod ||
+           patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative);
+    return MethodReference(target_dex_file_, target_idx_);
+  }
+
+  const DexFile* TargetTypeDexFile() const {
+    DCHECK(patch_type_ == kLinkerPatchType);
+    return target_dex_file_;
+  }
+
+  uint32_t TargetTypeIndex() const {
+    DCHECK(patch_type_ == kLinkerPatchType);
+    return target_idx_;
+  }
+
+ private:
+  LinkerPatch(size_t literal_offset, LinkerPatchType patch_type,
+              uint32_t target_idx, const DexFile* target_dex_file)
+      : literal_offset_(literal_offset),
+        patch_type_(patch_type),
+        target_idx_(target_idx),
+        target_dex_file_(target_dex_file) {
+  }
+
+  size_t literal_offset_;
+  LinkerPatchType patch_type_;
+  uint32_t target_idx_;  // Method index (Call/Method patches) or type index (Type patches).
+  const DexFile* target_dex_file_;
+
+  friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
+  friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
+};
+
+inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
+  return lhs.literal_offset_ == rhs.literal_offset_ &&
+      lhs.patch_type_ == rhs.patch_type_ &&
+      lhs.target_idx_ == rhs.target_idx_ &&
+      lhs.target_dex_file_ == rhs.target_dex_file_;
+}
+
+inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) {
+  return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_
+      : (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_
+      : (lhs.target_idx_ != rhs.target_idx_) ? lhs.target_idx_ < rhs.target_idx_
+      : lhs.target_dex_file_ < rhs.target_dex_file_;
+}
+
 class CompiledMethod FINAL : public CompiledCode {
  public:
   // Constructs a CompiledMethod for Quick.
@@ -184,7 +281,8 @@ class CompiledMethod FINAL : public CompiledCode {
                  const std::vector<uint8_t>& mapping_table,
                  const std::vector<uint8_t>& vmap_table,
                  const std::vector<uint8_t>& native_gc_map,
-                 const std::vector<uint8_t>* cfi_info);
+                 const std::vector<uint8_t>* cfi_info,
+                 const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
 
   // Constructs a CompiledMethod for Optimizing.
   CompiledMethod(CompilerDriver* driver,
@@ -250,6 +348,10 @@ class CompiledMethod FINAL : public CompiledCode {
     return cfi_info_;
   }
 
+  const std::vector<LinkerPatch>& GetPatches() const {
+    return patches_;
+  }
+
  private:
   // For quick code, the size of the activation used by the code.
   const size_t frame_size_in_bytes_;
@@ -269,6 +371,8 @@ class CompiledMethod FINAL : public CompiledCode {
   std::vector<uint8_t>* gc_map_;
   // For quick code, a FDE entry for the debug_frame section.
   std::vector<uint8_t>* cfi_info_;
+  // For quick code, linker patches needed by the method.
+  std::vector<LinkerPatch> patches_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_ir.cc b/compiler/dex/compiler_ir.cc
index ce48eb22b0..909c995c32 100644
--- a/compiler/dex/compiler_ir.cc
+++ b/compiler/dex/compiler_ir.cc
@@ -29,7 +29,6 @@ CompilationUnit::CompilationUnit(ArenaPool* pool)
     class_loader(nullptr),
     class_def_idx(0),
     method_idx(0),
-    code_item(nullptr),
     access_flags(0),
     invoke_type(kDirect),
     shorty(nullptr),
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index c4e43fdf2c..37e3a7a592 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -58,7 +58,6 @@ struct CompilationUnit {
   jobject class_loader;                // compiling method's class loader.
   uint16_t class_def_idx;              // compiling method's defining class definition index.
   uint32_t method_idx;                 // compiling method's index into method_ids of DexFile.
-  const DexFile::CodeItem* code_item;  // compiling method's DexFile code_item.
   uint32_t access_flags;               // compiling method's access flags.
   InvokeType invoke_type;              // compiling method's invocation type.
   const char* shorty;                  // compiling method's shorty.
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index d1abf7fa12..89f2b5c692 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -28,7 +28,7 @@ inline BasicBlock* DataflowIterator::ForwardSingleNext() {
   // Are we not yet at the end?
   if (idx_ < end_idx_) {
     // Get the next index.
-    BasicBlockId bb_id = block_id_list_->Get(idx_);
+    BasicBlockId bb_id = (*block_id_list_)[idx_];
     res = mir_graph_->GetBasicBlock(bb_id);
     idx_++;
   }
@@ -51,7 +51,7 @@ inline BasicBlock* DataflowIterator::ForwardRepeatNext() {
   // Are we not yet at the end?
   if (idx_ < end_idx_) {
     // Get the BasicBlockId.
-    BasicBlockId bb_id = block_id_list_->Get(idx_);
+    BasicBlockId bb_id = (*block_id_list_)[idx_];
     res = mir_graph_->GetBasicBlock(bb_id);
     idx_++;
   }
@@ -66,7 +66,7 @@ inline BasicBlock* DataflowIterator::ReverseSingleNext() {
   // Are we not yet at the end?
   if (idx_ >= 0) {
     // Get the BasicBlockId.
-    BasicBlockId bb_id = block_id_list_->Get(idx_);
+    BasicBlockId bb_id = (*block_id_list_)[idx_];
     res = mir_graph_->GetBasicBlock(bb_id);
     idx_--;
   }
@@ -89,7 +89,7 @@ inline BasicBlock* DataflowIterator::ReverseRepeatNext() {
   // Are we not yet done?
   if (idx_ >= 0) {
     // Get the BasicBlockId.
-    BasicBlockId bb_id = block_id_list_->Get(idx_);
+    BasicBlockId bb_id = (*block_id_list_)[idx_];
     res = mir_graph_->GetBasicBlock(bb_id);
     idx_--;
   }
@@ -97,34 +97,28 @@ inline BasicBlock* DataflowIterator::ReverseRepeatNext() {
   return res;
 }
 
-// AllNodes uses the existing GrowableArray iterator, and should be considered unordered.
+// AllNodes uses the existing block list, and should be considered unordered.
 inline BasicBlock* AllNodesIterator::Next(bool had_change) {
-  BasicBlock* res = NULL;
-
-  // Suppose we want to keep looking.
-  bool keep_looking = true;
-
-  // Find the next BasicBlock.
-  while (keep_looking == true) {
-    // Get next BasicBlock.
-    res = all_nodes_iterator_.Next();
+  // Update changed: if had_changed is true, we remember it for the whole iteration.
+  changed_ |= had_change;
 
-    // Are we done or is the BasicBlock not hidden?
-    if ((res == NULL) || (res->hidden == false)) {
-      keep_looking = false;
+  BasicBlock* res = nullptr;
+  while (idx_ != end_idx_) {
+    BasicBlock* bb = mir_graph_->GetBlockList()[idx_++];
+    DCHECK(bb != nullptr);
+    if (!bb->hidden) {
+      res = bb;
+      break;
     }
   }
 
-  // Update changed: if had_changed is true, we remember it for the whole iteration.
-  changed_ |= had_change;
-
   return res;
 }
 
 inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) {
   if (idx_ != 0) {
     // Mark last processed block visited.
-    BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx_ - 1));
+    BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx_ - 1]);
     bb->visited = true;
     if (had_change) {
       // If we had a change we need to revisit the children.
@@ -138,16 +132,17 @@ inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) {
   while (true) {
     // Pop loops we have left and check if we need to recalculate one of them.
     // NOTE: We need to do this even if idx_ == end_idx_.
-    while (loop_head_stack_->Size() != 0u &&
-        loop_ends_->Get(loop_head_stack_->Peek().first) == idx_) {
-      auto top = loop_head_stack_->Peek();
+    while (loop_head_stack_->size() != 0u &&
+        (*loop_ends_)[loop_head_stack_->back().first] == idx_) {
+      auto top = loop_head_stack_->back();
       uint16_t loop_head_idx = top.first;
       bool recalculated = top.second;
-      loop_head_stack_->Pop();
-      BasicBlock* loop_head = mir_graph_->GetBasicBlock(block_id_list_->Get(loop_head_idx));
+      loop_head_stack_->pop_back();
+      BasicBlock* loop_head = mir_graph_->GetBasicBlock((*block_id_list_)[loop_head_idx]);
       DCHECK(loop_head != nullptr);
       if (!recalculated || !loop_head->visited) {
-        loop_head_stack_->Insert(std::make_pair(loop_head_idx, true));  // Recalculating this loop.
+        // Recalculating this loop.
+        loop_head_stack_->push_back(std::make_pair(loop_head_idx, true));
         idx_ = loop_head_idx + 1;
         return loop_head;
       }
@@ -160,11 +155,11 @@ inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) {
     // Get next block and return it if unvisited.
     BasicBlockId idx = idx_;
     idx_ += 1;
-    BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx));
+    BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx]);
     DCHECK(bb != nullptr);
     if (!bb->visited) {
-      if (loop_ends_->Get(idx) != 0u) {
-        loop_head_stack_->Insert(std::make_pair(idx, false));  // Not recalculating.
+      if ((*loop_ends_)[idx] != 0u) {
+        loop_head_stack_->push_back(std::make_pair(idx, false));  // Not recalculating.
       }
       return bb;
     }
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 06d6832340..188f1d9fff 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -104,7 +104,7 @@ namespace art {
       MIRGraph* const mir_graph_;                       /**< @brief the MIRGraph */
       const int32_t start_idx_;                         /**< @brief the start index for the iteration */
       const int32_t end_idx_;                           /**< @brief the last index for the iteration */
-      GrowableArray<BasicBlockId>* block_id_list_;      /**< @brief the list of BasicBlocks we want to iterate on */
+      const ArenaVector<BasicBlockId>* block_id_list_;  /**< @brief the list of BasicBlocks we want to iterate on */
       int32_t idx_;                                     /**< @brief Current index for the iterator */
       int32_t repeats_;                                 /**< @brief Number of repeats over the iteration */
       bool changed_;                                    /**< @brief Has something changed during the current iteration? */
@@ -124,7 +124,7 @@ namespace art {
           : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         // Extra setup for the PreOrderDfsIterator.
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDfsOrder();
+        block_id_list_ = &mir_graph->GetDfsOrder();
       }
 
       /**
@@ -155,7 +155,7 @@ namespace art {
           : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         // Extra setup for the RepeatingPreOrderDfsIterator.
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDfsOrder();
+        block_id_list_ = &mir_graph->GetDfsOrder();
       }
 
       /**
@@ -186,7 +186,7 @@ namespace art {
           : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         // Extra setup for the RepeatingPostOrderDfsIterator.
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDfsPostOrder();
+        block_id_list_ = &mir_graph->GetDfsPostOrder();
       }
 
       /**
@@ -216,7 +216,7 @@ namespace art {
           : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) {
         // Extra setup for the ReversePostOrderDfsIterator.
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDfsPostOrder();
+        block_id_list_ = &mir_graph->GetDfsPostOrder();
       }
 
       /**
@@ -247,7 +247,7 @@ namespace art {
           : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) {
         // Extra setup for the RepeatingReversePostOrderDfsIterator
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDfsPostOrder();
+        block_id_list_ = &mir_graph->GetDfsPostOrder();
       }
 
       /**
@@ -277,7 +277,7 @@ namespace art {
           : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
         // Extra setup for thePostOrderDOMIterator.
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetDomPostOrder();
+        block_id_list_ = &mir_graph->GetDomPostOrder();
       }
 
       /**
@@ -304,15 +304,14 @@ namespace art {
        * @param mir_graph The MIRGraph considered.
        */
       explicit AllNodesIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, 0),
-            all_nodes_iterator_(mir_graph->GetBlockList()) {
+          : DataflowIterator(mir_graph, 0, mir_graph->GetBlockList().size()) {
       }
 
       /**
        * @brief Resetting the iterator.
        */
       void Reset() {
-        all_nodes_iterator_.Reset();
+        idx_ = 0;
       }
 
       /**
@@ -321,9 +320,6 @@ namespace art {
        * @return the next BasicBlock following the iteration order, 0 if finished.
        */
       virtual BasicBlock* Next(bool had_change = false) ALWAYS_INLINE;
-
-    private:
-      GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
   /**
@@ -337,10 +333,10 @@ namespace art {
        * @param mir_graph The MIRGraph considered.
        */
       explicit TopologicalSortIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()) {
+          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder().size()) {
         // Extra setup for TopologicalSortIterator.
         idx_ = start_idx_;
-        block_id_list_ = mir_graph->GetTopologicalSortOrder();
+        block_id_list_ = &mir_graph->GetTopologicalSortOrder();
       }
 
       /**
@@ -369,10 +365,10 @@ namespace art {
       * @param mir_graph The MIRGraph considered.
       */
      explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph)
-         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()) {
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder().size()) {
        // Extra setup for RepeatingTopologicalSortIterator.
        idx_ = start_idx_;
-       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+       block_id_list_ = &mir_graph->GetTopologicalSortOrder();
      }
 
      /**
@@ -408,19 +404,19 @@ namespace art {
       * @param mir_graph The MIRGraph considered.
       */
      explicit LoopRepeatingTopologicalSortIterator(MIRGraph* mir_graph)
-         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()),
-           loop_ends_(mir_graph->GetTopologicalSortOrderLoopEnds()),
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder().size()),
+           loop_ends_(&mir_graph->GetTopologicalSortOrderLoopEnds()),
            loop_head_stack_(mir_graph_->GetTopologicalSortOrderLoopHeadStack()) {
        // Extra setup for RepeatingTopologicalSortIterator.
        idx_ = start_idx_;
-       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+       block_id_list_ = &mir_graph->GetTopologicalSortOrder();
        // Clear visited flags and check that the loop head stack is empty.
        mir_graph->ClearAllVisitedFlags();
-       DCHECK_EQ(loop_head_stack_->Size(), 0u);
+       DCHECK_EQ(loop_head_stack_->size(), 0u);
      }
 
      ~LoopRepeatingTopologicalSortIterator() {
-       DCHECK_EQ(loop_head_stack_->Size(), 0u);
+       DCHECK_EQ(loop_head_stack_->size(), 0u);
      }
 
      /**
@@ -431,8 +427,8 @@ namespace art {
      virtual BasicBlock* Next(bool had_change = false) OVERRIDE;
 
     private:
-     const GrowableArray<BasicBlockId>* const loop_ends_;
-     GrowableArray<std::pair<uint16_t, bool>>* const loop_head_stack_;
+     const ArenaVector<BasicBlockId>* const loop_ends_;
+     ArenaVector<std::pair<uint16_t, bool>>* const loop_head_stack_;
   };
 
 }  // namespace art
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index 4d885fd5ba..af57529a08 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -89,21 +89,20 @@ LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb,
     // the loop head stack will also be empty and there will be nothing to merge anyway.
     bool use_all_predecessors = true;
     uint16_t loop_head_idx = 0u;  // Used only if !use_all_predecessors.
-    if (mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Size() != 0) {
+    if (mir_graph_->GetTopologicalSortOrderLoopHeadStack()->size() != 0) {
       // Full GVN inside a loop, see if we're at the loop head for the first time.
-      auto top = mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Peek();
+      auto top = mir_graph_->GetTopologicalSortOrderLoopHeadStack()->back();
       loop_head_idx = top.first;
       bool recalculating = top.second;
       use_all_predecessors = recalculating ||
-          loop_head_idx != mir_graph_->GetTopologicalSortOrderIndexes()->Get(bb->id);
+          loop_head_idx != mir_graph_->GetTopologicalSortOrderIndexes()[bb->id];
     }
-    GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    for (BasicBlock* pred_bb = mir_graph_->GetBasicBlock(iter.Next());
-         pred_bb != nullptr; pred_bb = mir_graph_->GetBasicBlock(iter.Next())) {
-      if (lvns_[pred_bb->id] != nullptr &&
+    for (BasicBlockId pred_id : bb->predecessors) {
+      DCHECK_NE(pred_id, NullBasicBlockId);
+      if (lvns_[pred_id] != nullptr &&
           (use_all_predecessors ||
-              mir_graph_->GetTopologicalSortOrderIndexes()->Get(pred_bb->id) < loop_head_idx)) {
-        merge_lvns_.push_back(lvns_[pred_bb->id]);
+              mir_graph_->GetTopologicalSortOrderIndexes()[pred_id] < loop_head_idx)) {
+        merge_lvns_.push_back(lvns_[pred_id]);
       }
     }
     // Determine merge type.
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index 1a38692eea..27183bfe4c 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -79,7 +79,7 @@ class GlobalValueNumbering {
   static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
     return (static_cast<uint64_t>(op) << 48 | static_cast<uint64_t>(operand1) << 32 |
             static_cast<uint64_t>(operand2) << 16 | static_cast<uint64_t>(modifier));
-  };
+  }
 
   // Look up a value in the global value map, adding a new entry if there was none before.
   uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
@@ -93,7 +93,7 @@ class GlobalValueNumbering {
       global_value_map_.PutBefore(lb, key, res);
     }
     return res;
-  };
+  }
 
   // Check if the exact value is stored in the global value map.
   bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
@@ -105,7 +105,7 @@ class GlobalValueNumbering {
     uint64_t key = BuildKey(op, operand1, operand2, modifier);
     ValueMap::const_iterator it = global_value_map_.find(key);
     return (it != global_value_map_.end() && it->second == value);
-  };
+  }
 
   // FieldReference represents a unique resolved field.
   struct FieldReference {
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index 1c0de37ca2..1d9920d24d 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -129,8 +129,8 @@ class GlobalValueNumberingTest : public testing::Test {
     { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
-    cu_.mir_graph->ifield_lowering_infos_.Reset();
-    cu_.mir_graph->ifield_lowering_infos_.Resize(count);
+    cu_.mir_graph->ifield_lowering_infos_.clear();
+    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
       MirIFieldLoweringInfo field_info(def->field_idx);
@@ -140,7 +140,7 @@ class GlobalValueNumberingTest : public testing::Test {
         field_info.flags_ = 0u |  // Without kFlagIsStatic.
             (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
       }
-      cu_.mir_graph->ifield_lowering_infos_.Insert(field_info);
+      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
     }
   }
 
@@ -150,8 +150,8 @@ class GlobalValueNumberingTest : public testing::Test {
   }
 
   void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.Reset();
-    cu_.mir_graph->sfield_lowering_infos_.Resize(count);
+    cu_.mir_graph->sfield_lowering_infos_.clear();
+    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
       MirSFieldLoweringInfo field_info(def->field_idx);
@@ -163,7 +163,7 @@ class GlobalValueNumberingTest : public testing::Test {
         field_info.declaring_field_idx_ = def->declaring_field_idx;
         field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
       }
-      cu_.mir_graph->sfield_lowering_infos_.Insert(field_info);
+      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
   }
 
@@ -174,41 +174,33 @@ class GlobalValueNumberingTest : public testing::Test {
 
   void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
     cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.Reset();
+    cu_.mir_graph->block_list_.clear();
     ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
     ASSERT_EQ(kNullBlock, defs[0].type);
     ASSERT_EQ(kEntryBlock, defs[1].type);
     ASSERT_EQ(kExitBlock, defs[2].type);
     for (size_t i = 0u; i != count; ++i) {
       const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i);
-      cu_.mir_graph->block_list_.Insert(bb);
+      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
       if (def->num_successors <= 2) {
         bb->successor_block_list_type = kNotUsed;
-        bb->successor_blocks = nullptr;
         bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
         bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
       } else {
         bb->successor_block_list_type = kPackedSwitch;
         bb->fall_through = 0u;
         bb->taken = 0u;
-        bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
-            &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks);
+        bb->successor_blocks.reserve(def->num_successors);
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
                                                                kArenaAllocSuccessor));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks->Insert(successor_block_info);
+          bb->successor_blocks.push_back(successor_block_info);
         }
       }
-      bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>(
-          &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors);
-      for (size_t j = 0u; j != def->num_predecessors; ++j) {
-        ASSERT_NE(0u, def->predecessors[j]);
-        bb->predecessors->Insert(def->predecessors[j]);
-      }
+      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
       if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
         bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
             cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
@@ -216,10 +208,10 @@ class GlobalValueNumberingTest : public testing::Test {
       }
     }
     cu_.mir_graph->num_blocks_ = count;
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.Size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1);
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
     ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
     ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
   }
 
@@ -235,24 +227,23 @@ class GlobalValueNumberingTest : public testing::Test {
     for (size_t i = 0u; i != count; ++i) {
       const MIRDef* def = &defs[i];
       MIR* mir = &mirs_[i];
-      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.Size());
-      BasicBlock* bb = cu_.mir_graph->block_list_.Get(def->bbid);
+      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
+      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
       bb->AppendMIR(mir);
       mir->dalvikInsn.opcode = def->opcode;
       mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
       mir->dalvikInsn.vB_wide = def->value;
       if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.Size());
+        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_info;
       } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.Size());
+        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_info;
       } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
         mir->meta.phi_incoming = static_cast<BasicBlockId*>(
             allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo));
-        for (size_t i = 0; i != def->num_uses; ++i) {
-          mir->meta.phi_incoming[i] = bb->predecessors->Get(i);
-        }
+        ASSERT_EQ(def->num_uses, bb->predecessors.size());
+        std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
       }
       mir->ssa_rep = &ssa_reps_[i];
       mir->ssa_rep->num_uses = def->num_uses;
@@ -269,7 +260,7 @@ class GlobalValueNumberingTest : public testing::Test {
     DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
         cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
     code_item->insns_size_in_code_units_ = 2u * count;
-    cu_.mir_graph->current_code_item_ = cu_.code_item = code_item;
+    cu_.mir_graph->current_code_item_ = code_item;
   }
 
   template <size_t count>
@@ -345,11 +336,11 @@ class GlobalValueNumberingTest : public testing::Test {
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // Bind all possible sregs to live vregs for test purposes.
     live_in_v_->SetInitialBits(kMaxSsaRegs);
-    cu_.mir_graph->ssa_base_vregs_ = new (&cu_.arena) GrowableArray<int>(&cu_.arena, kMaxSsaRegs);
-    cu_.mir_graph->ssa_subscripts_ = new (&cu_.arena) GrowableArray<int>(&cu_.arena, kMaxSsaRegs);
+    cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
+    cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs);
     for (unsigned int i = 0; i < kMaxSsaRegs; i++) {
-      cu_.mir_graph->ssa_base_vregs_->Insert(i);
-      cu_.mir_graph->ssa_subscripts_->Insert(0);
+      cu_.mir_graph->ssa_base_vregs_.push_back(i);
+      cu_.mir_graph->ssa_subscripts_.push_back(0);
     }
   }
 
@@ -438,12 +429,10 @@ GlobalValueNumberingTestCatch::GlobalValueNumberingTestCatch()
   // Add successor block info to the check block.
   BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
   check_bb->successor_block_list_type = kCatch;
-  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
-      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
   SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
       (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
   successor_block_info->block = catch_handler->id;
-  check_bb->successor_blocks->Insert(successor_block_info);
+  check_bb->successor_blocks.push_back(successor_block_info);
 }
 
 class GlobalValueNumberingTestTwoConsecutiveLoops : public GlobalValueNumberingTest {
@@ -2120,12 +2109,10 @@ TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) {
   // Add successor block info to the check block.
   BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
   check_bb->successor_block_list_type = kCatch;
-  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
-      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
   SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
       (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
   successor_block_info->block = catch_handler->id;
-  check_bb->successor_blocks->Insert(successor_block_info);
+  check_bb->successor_blocks.push_back(successor_block_info);
   BasicBlock* merge_block = cu_.mir_graph->GetBasicBlock(4u);
   std::swap(merge_block->taken, merge_block->fall_through);
   PrepareMIRs(mirs);
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index f6a454bb1d..e11c6e5cb0 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -122,19 +122,19 @@ class LocalValueNumbering {
 
   void SetOperandValue(uint16_t s_reg, uint16_t value) {
     SetOperandValueImpl(s_reg, value, &sreg_value_map_);
-  };
+  }
 
   uint16_t GetOperandValue(int s_reg) const {
     return GetOperandValueImpl(s_reg, &sreg_value_map_);
-  };
+  }
 
   void SetOperandValueWide(uint16_t s_reg, uint16_t value) {
     SetOperandValueImpl(s_reg, value, &sreg_wide_value_map_);
-  };
+  }
 
   uint16_t GetOperandValueWide(int s_reg) const {
     return GetOperandValueImpl(s_reg, &sreg_wide_value_map_);
-  };
+  }
 
   struct RangeCheckKey {
     uint16_t array;
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index e4e944e8a8..e53c640fa2 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -86,8 +86,8 @@ class LocalValueNumberingTest : public testing::Test {
     { opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
-    cu_.mir_graph->ifield_lowering_infos_.Reset();
-    cu_.mir_graph->ifield_lowering_infos_.Resize(count);
+    cu_.mir_graph->ifield_lowering_infos_.clear();
+    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
       MirIFieldLoweringInfo field_info(def->field_idx);
@@ -97,7 +97,7 @@ class LocalValueNumberingTest : public testing::Test {
         field_info.flags_ = 0u |  // Without kFlagIsStatic.
             (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
       }
-      cu_.mir_graph->ifield_lowering_infos_.Insert(field_info);
+      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
     }
   }
 
@@ -107,8 +107,8 @@ class LocalValueNumberingTest : public testing::Test {
   }
 
   void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.Reset();
-    cu_.mir_graph->sfield_lowering_infos_.Resize(count);
+    cu_.mir_graph->sfield_lowering_infos_.clear();
+    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
       MirSFieldLoweringInfo field_info(def->field_idx);
@@ -120,7 +120,7 @@ class LocalValueNumberingTest : public testing::Test {
         field_info.declaring_field_idx_ = def->declaring_field_idx;
         field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
       }
-      cu_.mir_graph->sfield_lowering_infos_.Insert(field_info);
+      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
   }
 
@@ -140,10 +140,10 @@ class LocalValueNumberingTest : public testing::Test {
       mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
       mir->dalvikInsn.vB_wide = def->value;
       if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.Size());
+        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_info;
       } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.Size());
+        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_info;
       }
       mir->ssa_rep = &ssa_reps_[i];
@@ -170,8 +170,8 @@ class LocalValueNumberingTest : public testing::Test {
   }
 
   void MakeSFieldUninitialized(uint32_t sfield_index) {
-    CHECK_LT(sfield_index, cu_.mir_graph->sfield_lowering_infos_.Size());
-    cu_.mir_graph->sfield_lowering_infos_.GetRawStorage()[sfield_index].flags_ &=
+    CHECK_LT(sfield_index, cu_.mir_graph->sfield_lowering_infos_.size());
+    cu_.mir_graph->sfield_lowering_infos_[sfield_index].flags_ &=
         ~MirSFieldLoweringInfo::kFlagIsInitialized;
   }
 
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 9fa5facd19..ee48796996 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -29,875 +29,911 @@
 
 namespace art {
 
-  // Instruction characteristics used to statically identify computation-intensive methods.
-const uint32_t MIRGraph::analysis_attributes_[kMirOpLast] = {
+enum InstructionAnalysisAttributeOps : uint8_t {
+  kUninterestingOp = 0,
+  kArithmeticOp,
+  kFpOp,
+  kSingleOp,
+  kDoubleOp,
+  kIntOp,
+  kLongOp,
+  kBranchOp,
+  kInvokeOp,
+  kArrayOp,
+  kHeavyweightOp,
+  kSimpleConstOp,
+  kMoveOp,
+  kSwitch
+};
+
+enum InstructionAnalysisAttributeMasks : uint16_t {
+  kAnNone = 1 << kUninterestingOp,
+  kAnMath = 1 << kArithmeticOp,
+  kAnFp = 1 << kFpOp,
+  kAnLong = 1 << kLongOp,
+  kAnInt = 1 << kIntOp,
+  kAnSingle = 1 << kSingleOp,
+  kAnDouble = 1 << kDoubleOp,
+  kAnFloatMath = 1 << kFpOp,
+  kAnBranch = 1 << kBranchOp,
+  kAnInvoke = 1 << kInvokeOp,
+  kAnArrayOp = 1 << kArrayOp,
+  kAnHeavyWeight = 1 << kHeavyweightOp,
+  kAnSimpleConst = 1 << kSimpleConstOp,
+  kAnMove = 1 << kMoveOp,
+  kAnSwitch = 1 << kSwitch,
+  kAnComputational = kAnMath | kAnArrayOp | kAnMove | kAnSimpleConst,
+};
+
+// Instruction characteristics used to statically identify computation-intensive methods.
+static const uint16_t kAnalysisAttributes[kMirOpLast] = {
   // 00 NOP
-  AN_NONE,
+  kAnNone,
 
   // 01 MOVE vA, vB
-  AN_MOVE,
+  kAnMove,
 
   // 02 MOVE_FROM16 vAA, vBBBB
-  AN_MOVE,
+  kAnMove,
 
   // 03 MOVE_16 vAAAA, vBBBB
-  AN_MOVE,
+  kAnMove,
 
   // 04 MOVE_WIDE vA, vB
-  AN_MOVE,
+  kAnMove,
 
   // 05 MOVE_WIDE_FROM16 vAA, vBBBB
-  AN_MOVE,
+  kAnMove,
 
   // 06 MOVE_WIDE_16 vAAAA, vBBBB
-  AN_MOVE,
+  kAnMove,
 
   // 07 MOVE_OBJECT vA, vB
-  AN_MOVE,
+  kAnMove,
 
   // 08 MOVE_OBJECT_FROM16 vAA, vBBBB
-  AN_MOVE,
+  kAnMove,
 
   // 09 MOVE_OBJECT_16 vAAAA, vBBBB
-  AN_MOVE,
+  kAnMove,
 
   // 0A MOVE_RESULT vAA
-  AN_MOVE,
+  kAnMove,
 
   // 0B MOVE_RESULT_WIDE vAA
-  AN_MOVE,
+  kAnMove,
 
   // 0C MOVE_RESULT_OBJECT vAA
-  AN_MOVE,
+  kAnMove,
 
   // 0D MOVE_EXCEPTION vAA
-  AN_MOVE,
+  kAnMove,
 
   // 0E RETURN_VOID
-  AN_BRANCH,
+  kAnBranch,
 
   // 0F RETURN vAA
-  AN_BRANCH,
+  kAnBranch,
 
   // 10 RETURN_WIDE vAA
-  AN_BRANCH,
+  kAnBranch,
 
   // 11 RETURN_OBJECT vAA
-  AN_BRANCH,
+  kAnBranch,
 
   // 12 CONST_4 vA, #+B
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 13 CONST_16 vAA, #+BBBB
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 14 CONST vAA, #+BBBBBBBB
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 15 CONST_HIGH16 VAA, #+BBBB0000
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 16 CONST_WIDE_16 vAA, #+BBBB
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 17 CONST_WIDE_32 vAA, #+BBBBBBBB
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 18 CONST_WIDE vAA, #+BBBBBBBBBBBBBBBB
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 19 CONST_WIDE_HIGH16 vAA, #+BBBB000000000000
-  AN_SIMPLECONST,
+  kAnSimpleConst,
 
   // 1A CONST_STRING vAA, string@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 1B CONST_STRING_JUMBO vAA, string@BBBBBBBB
-  AN_NONE,
+  kAnNone,
 
   // 1C CONST_CLASS vAA, type@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 1D MONITOR_ENTER vAA
-  AN_NONE,
+  kAnNone,
 
   // 1E MONITOR_EXIT vAA
-  AN_NONE,
+  kAnNone,
 
   // 1F CHK_CAST vAA, type@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 20 INSTANCE_OF vA, vB, type@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 21 ARRAY_LENGTH vA, vB
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 22 NEW_INSTANCE vAA, type@BBBB
-  AN_HEAVYWEIGHT,
+  kAnHeavyWeight,
 
   // 23 NEW_ARRAY vA, vB, type@CCCC
-  AN_HEAVYWEIGHT,
+  kAnHeavyWeight,
 
   // 24 FILLED_NEW_ARRAY {vD, vE, vF, vG, vA}
-  AN_HEAVYWEIGHT,
+  kAnHeavyWeight,
 
   // 25 FILLED_NEW_ARRAY_RANGE {vCCCC .. vNNNN}, type@BBBB
-  AN_HEAVYWEIGHT,
+  kAnHeavyWeight,
 
   // 26 FILL_ARRAY_DATA vAA, +BBBBBBBB
-  AN_NONE,
+  kAnNone,
 
   // 27 THROW vAA
-  AN_HEAVYWEIGHT | AN_BRANCH,
+  kAnHeavyWeight | kAnBranch,
 
   // 28 GOTO
-  AN_BRANCH,
+  kAnBranch,
 
   // 29 GOTO_16
-  AN_BRANCH,
+  kAnBranch,
 
   // 2A GOTO_32
-  AN_BRANCH,
+  kAnBranch,
 
   // 2B PACKED_SWITCH vAA, +BBBBBBBB
-  AN_SWITCH,
+  kAnSwitch,
 
   // 2C SPARSE_SWITCH vAA, +BBBBBBBB
-  AN_SWITCH,
+  kAnSwitch,
 
   // 2D CMPL_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // 2E CMPG_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // 2F CMPL_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // 30 CMPG_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // 31 CMP_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 32 IF_EQ vA, vB, +CCCC
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 33 IF_NE vA, vB, +CCCC
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 34 IF_LT vA, vB, +CCCC
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 35 IF_GE vA, vB, +CCCC
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 36 IF_GT vA, vB, +CCCC
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 37 IF_LE vA, vB, +CCCC
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 38 IF_EQZ vAA, +BBBB
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 39 IF_NEZ vAA, +BBBB
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 3A IF_LTZ vAA, +BBBB
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 3B IF_GEZ vAA, +BBBB
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 3C IF_GTZ vAA, +BBBB
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 3D IF_LEZ vAA, +BBBB
-  AN_MATH | AN_BRANCH | AN_INT,
+  kAnMath | kAnBranch | kAnInt,
 
   // 3E UNUSED_3E
-  AN_NONE,
+  kAnNone,
 
   // 3F UNUSED_3F
-  AN_NONE,
+  kAnNone,
 
   // 40 UNUSED_40
-  AN_NONE,
+  kAnNone,
 
   // 41 UNUSED_41
-  AN_NONE,
+  kAnNone,
 
   // 42 UNUSED_42
-  AN_NONE,
+  kAnNone,
 
   // 43 UNUSED_43
-  AN_NONE,
+  kAnNone,
 
   // 44 AGET vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 45 AGET_WIDE vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 46 AGET_OBJECT vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 47 AGET_BOOLEAN vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 48 AGET_BYTE vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 49 AGET_CHAR vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 4A AGET_SHORT vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 4B APUT vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 4C APUT_WIDE vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 4D APUT_OBJECT vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 4E APUT_BOOLEAN vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 4F APUT_BYTE vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 50 APUT_CHAR vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 51 APUT_SHORT vAA, vBB, vCC
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 52 IGET vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 53 IGET_WIDE vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 54 IGET_OBJECT vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 55 IGET_BOOLEAN vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 56 IGET_BYTE vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 57 IGET_CHAR vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 58 IGET_SHORT vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 59 IPUT vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 5A IPUT_WIDE vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 5B IPUT_OBJECT vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 5C IPUT_BOOLEAN vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 5D IPUT_BYTE vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 5E IPUT_CHAR vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 5F IPUT_SHORT vA, vB, field@CCCC
-  AN_NONE,
+  kAnNone,
 
   // 60 SGET vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 61 SGET_WIDE vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 62 SGET_OBJECT vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 63 SGET_BOOLEAN vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 64 SGET_BYTE vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 65 SGET_CHAR vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 66 SGET_SHORT vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 67 SPUT vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 68 SPUT_WIDE vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 69 SPUT_OBJECT vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 6A SPUT_BOOLEAN vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 6B SPUT_BYTE vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 6C SPUT_CHAR vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 6D SPUT_SHORT vAA, field@BBBB
-  AN_NONE,
+  kAnNone,
 
   // 6E INVOKE_VIRTUAL {vD, vE, vF, vG, vA}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 6F INVOKE_SUPER {vD, vE, vF, vG, vA}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 70 INVOKE_DIRECT {vD, vE, vF, vG, vA}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 71 INVOKE_STATIC {vD, vE, vF, vG, vA}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 73 UNUSED_73
-  AN_NONE,
+  kAnNone,
 
   // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 75 INVOKE_SUPER_RANGE {vCCCC .. vNNNN}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 76 INVOKE_DIRECT_RANGE {vCCCC .. vNNNN}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 77 INVOKE_STATIC_RANGE {vCCCC .. vNNNN}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 78 INVOKE_INTERFACE_RANGE {vCCCC .. vNNNN}
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // 79 UNUSED_79
-  AN_NONE,
+  kAnNone,
 
   // 7A UNUSED_7A
-  AN_NONE,
+  kAnNone,
 
   // 7B NEG_INT vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 7C NOT_INT vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 7D NEG_LONG vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 7E NOT_LONG vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 7F NEG_FLOAT vA, vB
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // 80 NEG_DOUBLE vA, vB
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // 81 INT_TO_LONG vA, vB
-  AN_MATH | AN_INT | AN_LONG,
+  kAnMath | kAnInt | kAnLong,
 
   // 82 INT_TO_FLOAT vA, vB
-  AN_MATH | AN_FP | AN_INT | AN_SINGLE,
+  kAnMath | kAnFp | kAnInt | kAnSingle,
 
   // 83 INT_TO_DOUBLE vA, vB
-  AN_MATH | AN_FP | AN_INT | AN_DOUBLE,
+  kAnMath | kAnFp | kAnInt | kAnDouble,
 
   // 84 LONG_TO_INT vA, vB
-  AN_MATH | AN_INT | AN_LONG,
+  kAnMath | kAnInt | kAnLong,
 
   // 85 LONG_TO_FLOAT vA, vB
-  AN_MATH | AN_FP | AN_LONG | AN_SINGLE,
+  kAnMath | kAnFp | kAnLong | kAnSingle,
 
   // 86 LONG_TO_DOUBLE vA, vB
-  AN_MATH | AN_FP | AN_LONG | AN_DOUBLE,
+  kAnMath | kAnFp | kAnLong | kAnDouble,
 
   // 87 FLOAT_TO_INT vA, vB
-  AN_MATH | AN_FP | AN_INT | AN_SINGLE,
+  kAnMath | kAnFp | kAnInt | kAnSingle,
 
   // 88 FLOAT_TO_LONG vA, vB
-  AN_MATH | AN_FP | AN_LONG | AN_SINGLE,
+  kAnMath | kAnFp | kAnLong | kAnSingle,
 
   // 89 FLOAT_TO_DOUBLE vA, vB
-  AN_MATH | AN_FP | AN_SINGLE | AN_DOUBLE,
+  kAnMath | kAnFp | kAnSingle | kAnDouble,
 
   // 8A DOUBLE_TO_INT vA, vB
-  AN_MATH | AN_FP | AN_INT | AN_DOUBLE,
+  kAnMath | kAnFp | kAnInt | kAnDouble,
 
   // 8B DOUBLE_TO_LONG vA, vB
-  AN_MATH | AN_FP | AN_LONG | AN_DOUBLE,
+  kAnMath | kAnFp | kAnLong | kAnDouble,
 
   // 8C DOUBLE_TO_FLOAT vA, vB
-  AN_MATH | AN_FP | AN_SINGLE | AN_DOUBLE,
+  kAnMath | kAnFp | kAnSingle | kAnDouble,
 
   // 8D INT_TO_BYTE vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 8E INT_TO_CHAR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 8F INT_TO_SHORT vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 90 ADD_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 91 SUB_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 92 MUL_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 93 DIV_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 94 REM_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 95 AND_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 96 OR_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 97 XOR_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 98 SHL_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 99 SHR_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 9A USHR_INT vAA, vBB, vCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // 9B ADD_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 9C SUB_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 9D MUL_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 9E DIV_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // 9F REM_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A0 AND_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A1 OR_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A2 XOR_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A3 SHL_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A4 SHR_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A5 USHR_LONG vAA, vBB, vCC
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // A6 ADD_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // A7 SUB_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // A8 MUL_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // A9 DIV_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // AA REM_FLOAT vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // AB ADD_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // AC SUB_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // AD MUL_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // AE DIV_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // AF REM_DOUBLE vAA, vBB, vCC
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // B0 ADD_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B1 SUB_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B2 MUL_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B3 DIV_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B4 REM_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B5 AND_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B6 OR_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B7 XOR_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B8 SHL_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // B9 SHR_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // BA USHR_INT_2ADDR vA, vB
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // BB ADD_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // BC SUB_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // BD MUL_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // BE DIV_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // BF REM_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C0 AND_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C1 OR_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C2 XOR_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C3 SHL_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C4 SHR_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C5 USHR_LONG_2ADDR vA, vB
-  AN_MATH | AN_LONG,
+  kAnMath | kAnLong,
 
   // C6 ADD_FLOAT_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // C7 SUB_FLOAT_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // C8 MUL_FLOAT_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // C9 DIV_FLOAT_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // CA REM_FLOAT_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_SINGLE,
+  kAnMath | kAnFp | kAnSingle,
 
   // CB ADD_DOUBLE_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // CC SUB_DOUBLE_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // CD MUL_DOUBLE_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // CE DIV_DOUBLE_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // CF REM_DOUBLE_2ADDR vA, vB
-  AN_MATH | AN_FP | AN_DOUBLE,
+  kAnMath | kAnFp | kAnDouble,
 
   // D0 ADD_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D1 RSUB_INT vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D2 MUL_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D3 DIV_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D4 REM_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D5 AND_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D6 OR_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D7 XOR_INT_LIT16 vA, vB, #+CCCC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D8 ADD_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // D9 RSUB_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // DA MUL_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // DB DIV_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // DC REM_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // DD AND_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // DE OR_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // DF XOR_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // E0 SHL_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // E1 SHR_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // E2 USHR_INT_LIT8 vAA, vBB, #+CC
-  AN_MATH | AN_INT,
+  kAnMath | kAnInt,
 
   // E3 IGET_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // E4 IPUT_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // E5 SGET_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // E6 SPUT_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // E7 IGET_OBJECT_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // E8 IGET_WIDE_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // E9 IPUT_WIDE_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // EA SGET_WIDE_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // EB SPUT_WIDE_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // EC BREAKPOINT
-  AN_NONE,
+  kAnNone,
 
   // ED THROW_VERIFICATION_ERROR
-  AN_HEAVYWEIGHT | AN_BRANCH,
+  kAnHeavyWeight | kAnBranch,
 
   // EE EXECUTE_INLINE
-  AN_NONE,
+  kAnNone,
 
   // EF EXECUTE_INLINE_RANGE
-  AN_NONE,
+  kAnNone,
 
   // F0 INVOKE_OBJECT_INIT_RANGE
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // F1 RETURN_VOID_BARRIER
-  AN_BRANCH,
+  kAnBranch,
 
   // F2 IGET_QUICK
-  AN_NONE,
+  kAnNone,
 
   // F3 IGET_WIDE_QUICK
-  AN_NONE,
+  kAnNone,
 
   // F4 IGET_OBJECT_QUICK
-  AN_NONE,
+  kAnNone,
 
   // F5 IPUT_QUICK
-  AN_NONE,
+  kAnNone,
 
   // F6 IPUT_WIDE_QUICK
-  AN_NONE,
+  kAnNone,
 
   // F7 IPUT_OBJECT_QUICK
-  AN_NONE,
+  kAnNone,
 
   // F8 INVOKE_VIRTUAL_QUICK
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // F9 INVOKE_VIRTUAL_QUICK_RANGE
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // FA INVOKE_SUPER_QUICK
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // FB INVOKE_SUPER_QUICK_RANGE
-  AN_INVOKE | AN_HEAVYWEIGHT,
+  kAnInvoke | kAnHeavyWeight,
 
   // FC IPUT_OBJECT_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // FD SGET_OBJECT_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // FE SPUT_OBJECT_VOLATILE
-  AN_NONE,
+  kAnNone,
 
   // FF UNUSED_FF
-  AN_NONE,
+  kAnNone,
 
   // Beginning of extended MIR opcodes
   // 100 MIR_PHI
-  AN_NONE,
+  kAnNone,
 
   // 101 MIR_COPY
-  AN_NONE,
+  kAnNone,
 
   // 102 MIR_FUSED_CMPL_FLOAT
-  AN_NONE,
+  kAnNone,
 
   // 103 MIR_FUSED_CMPG_FLOAT
-  AN_NONE,
+  kAnNone,
 
   // 104 MIR_FUSED_CMPL_DOUBLE
-  AN_NONE,
+  kAnNone,
 
   // 105 MIR_FUSED_CMPG_DOUBLE
-  AN_NONE,
+  kAnNone,
 
   // 106 MIR_FUSED_CMP_LONG
-  AN_NONE,
+  kAnNone,
 
   // 107 MIR_NOP
-  AN_NONE,
+  kAnNone,
 
   // 108 MIR_NULL_CHECK
-  AN_NONE,
+  kAnNone,
 
   // 109 MIR_RANGE_CHECK
-  AN_NONE,
+  kAnNone,
 
   // 10A MIR_DIV_ZERO_CHECK
-  AN_NONE,
+  kAnNone,
 
   // 10B MIR_CHECK
-  AN_NONE,
+  kAnNone,
 
   // 10C MIR_CHECKPART2
-  AN_NONE,
+  kAnNone,
 
   // 10D MIR_SELECT
-  AN_NONE,
+  kAnNone,
 
   // 10E MirOpConstVector
-  AN_NONE,
+  kAnNone,
 
   // 10F MirOpMoveVector
-  AN_NONE,
+  kAnNone,
 
   // 110 MirOpPackedMultiply
-  AN_NONE,
+  kAnNone,
 
   // 111 MirOpPackedAddition
-  AN_NONE,
+  kAnNone,
 
   // 112 MirOpPackedSubtract
-  AN_NONE,
+  kAnNone,
 
   // 113 MirOpPackedShiftLeft
-  AN_NONE,
+  kAnNone,
 
   // 114 MirOpPackedSignedShiftRight
-  AN_NONE,
+  kAnNone,
 
   // 115 MirOpPackedUnsignedShiftRight
-  AN_NONE,
+  kAnNone,
 
   // 116 MirOpPackedAnd
-  AN_NONE,
+  kAnNone,
 
   // 117 MirOpPackedOr
-  AN_NONE,
+  kAnNone,
 
   // 118 MirOpPackedXor
-  AN_NONE,
+  kAnNone,
 
   // 119 MirOpPackedAddReduce
-  AN_NONE,
+  kAnNone,
 
   // 11A MirOpPackedReduce
-  AN_NONE,
+  kAnNone,
 
   // 11B MirOpPackedSet
-  AN_NONE,
+  kAnNone,
 
   // 11C MirOpReserveVectorRegisters
-  AN_NONE,
+  kAnNone,
 
   // 11D MirOpReturnVectorRegisters
-  AN_NONE,
+  kAnNone,
 
   // 11E MirOpMemBarrier
-  AN_NONE,
+  kAnNone,
 
   // 11F MirOpPackedArrayGet
-  AN_ARRAYOP,
+  kAnArrayOp,
 
   // 120 MirOpPackedArrayPut
-  AN_ARRAYOP,
+  kAnArrayOp,
 };
 
 struct MethodStats {
@@ -929,10 +965,10 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) {
    */
   BasicBlock* ending_bb = bb;
   if (ending_bb->last_mir_insn != NULL) {
-    uint32_t ending_flags = analysis_attributes_[ending_bb->last_mir_insn->dalvikInsn.opcode];
-    while ((ending_flags & AN_BRANCH) == 0) {
+    uint32_t ending_flags = kAnalysisAttributes[ending_bb->last_mir_insn->dalvikInsn.opcode];
+    while ((ending_flags & kAnBranch) == 0) {
       ending_bb = GetBasicBlock(ending_bb->fall_through);
-      ending_flags = analysis_attributes_[ending_bb->last_mir_insn->dalvikInsn.opcode];
+      ending_flags = kAnalysisAttributes[ending_bb->last_mir_insn->dalvikInsn.opcode];
     }
   }
   /*
@@ -963,27 +999,27 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) {
         // Skip any MIR pseudo-op.
         continue;
       }
-      uint32_t flags = analysis_attributes_[mir->dalvikInsn.opcode];
+      uint16_t flags = kAnalysisAttributes[mir->dalvikInsn.opcode];
       stats->dex_instructions += loop_scale_factor;
-      if ((flags & AN_BRANCH) == 0) {
-        computational_block &= ((flags & AN_COMPUTATIONAL) != 0);
+      if ((flags & kAnBranch) == 0) {
+        computational_block &= ((flags & kAnComputational) != 0);
       } else {
         stats->branch_ops += loop_scale_factor;
       }
-      if ((flags & AN_MATH) != 0) {
+      if ((flags & kAnMath) != 0) {
         stats->math_ops += loop_scale_factor;
         has_math = true;
       }
-      if ((flags & AN_FP) != 0) {
+      if ((flags & kAnFp) != 0) {
         stats->fp_ops += loop_scale_factor;
       }
-      if ((flags & AN_ARRAYOP) != 0) {
+      if ((flags & kAnArrayOp) != 0) {
         stats->array_ops += loop_scale_factor;
       }
-      if ((flags & AN_HEAVYWEIGHT) != 0) {
+      if ((flags & kAnHeavyWeight) != 0) {
         stats->heavyweight_ops += loop_scale_factor;
       }
-      if ((flags & AN_SWITCH) != 0) {
+      if ((flags & kAnSwitch) != 0) {
         stats->has_switch = true;
       }
     }
@@ -1166,7 +1202,7 @@ bool MIRGraph::SkipCompilation(std::string* skip_message) {
 
 void MIRGraph::DoCacheFieldLoweringInfo() {
   // All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN.
-  const uint32_t max_refs = (current_code_item_->insns_size_in_code_units_ - 1u) / 2u;
+  const uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 2u;
   ScopedArenaAllocator allocator(&cu_->arena_stack);
   uint16_t* field_idxs =
       reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc));
@@ -1182,12 +1218,11 @@ void MIRGraph::DoCacheFieldLoweringInfo() {
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
       if (mir->dalvikInsn.opcode >= Instruction::IGET &&
           mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
-        const Instruction* insn = Instruction::At(current_code_item_->insns_ + mir->offset);
         // Get field index and try to find it among existing indexes. If found, it's usually among
         // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this
         // is a linear search, it actually performs much better than map based approach.
         if (mir->dalvikInsn.opcode <= Instruction::IPUT_SHORT) {
-          uint16_t field_idx = insn->VRegC_22c();
+          uint16_t field_idx = mir->dalvikInsn.vC;
           size_t i = ifield_pos;
           while (i != 0u && field_idxs[i - 1] != field_idx) {
             --i;
@@ -1199,7 +1234,7 @@ void MIRGraph::DoCacheFieldLoweringInfo() {
             field_idxs[ifield_pos++] = field_idx;
           }
         } else {
-          uint16_t field_idx = insn->VRegB_21c();
+          uint16_t field_idx = mir->dalvikInsn.vB;
           size_t i = sfield_pos;
           while (i != max_refs && field_idxs[i] != field_idx) {
             ++i;
@@ -1218,25 +1253,25 @@ void MIRGraph::DoCacheFieldLoweringInfo() {
 
   if (ifield_pos != 0u) {
     // Resolve instance field infos.
-    DCHECK_EQ(ifield_lowering_infos_.Size(), 0u);
-    ifield_lowering_infos_.Resize(ifield_pos);
+    DCHECK_EQ(ifield_lowering_infos_.size(), 0u);
+    ifield_lowering_infos_.reserve(ifield_pos);
     for (size_t pos = 0u; pos != ifield_pos; ++pos) {
-      ifield_lowering_infos_.Insert(MirIFieldLoweringInfo(field_idxs[pos]));
+      ifield_lowering_infos_.push_back(MirIFieldLoweringInfo(field_idxs[pos]));
     }
     MirIFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
-                                ifield_lowering_infos_.GetRawStorage(), ifield_pos);
+                                   ifield_lowering_infos_.data(), ifield_pos);
   }
 
   if (sfield_pos != max_refs) {
     // Resolve static field infos.
-    DCHECK_EQ(sfield_lowering_infos_.Size(), 0u);
-    sfield_lowering_infos_.Resize(max_refs - sfield_pos);
+    DCHECK_EQ(sfield_lowering_infos_.size(), 0u);
+    sfield_lowering_infos_.reserve(max_refs - sfield_pos);
     for (size_t pos = max_refs; pos != sfield_pos;) {
       --pos;
-      sfield_lowering_infos_.Insert(MirSFieldLoweringInfo(field_idxs[pos]));
+      sfield_lowering_infos_.push_back(MirSFieldLoweringInfo(field_idxs[pos]));
     }
     MirSFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
-                                sfield_lowering_infos_.GetRawStorage(), max_refs - sfield_pos);
+                                   sfield_lowering_infos_.data(), max_refs - sfield_pos);
   }
 }
 
@@ -1279,7 +1314,7 @@ void MIRGraph::DoCacheMethodLoweringInfo() {
   ScopedArenaAllocator allocator(&cu_->arena_stack);
 
   // All INVOKE instructions take 3 code units and there must also be a RETURN.
-  uint32_t max_refs = (current_code_item_->insns_size_in_code_units_ - 1u) / 3u;
+  uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 3u;
 
   // Map invoke key (see MapEntry) to lowering info index and vice versa.
   // The invoke_map and sequential entries are essentially equivalent to Boost.MultiIndex's
@@ -1300,14 +1335,13 @@ void MIRGraph::DoCacheMethodLoweringInfo() {
           mir->dalvikInsn.opcode <= Instruction::INVOKE_INTERFACE_RANGE &&
           mir->dalvikInsn.opcode != Instruction::RETURN_VOID_BARRIER) {
         // Decode target method index and invoke type.
-        const Instruction* insn = Instruction::At(current_code_item_->insns_ + mir->offset);
         uint16_t target_method_idx;
         uint16_t invoke_type_idx;
         if (mir->dalvikInsn.opcode <= Instruction::INVOKE_INTERFACE) {
-          target_method_idx = insn->VRegB_35c();
+          target_method_idx = mir->dalvikInsn.vB;
           invoke_type_idx = mir->dalvikInsn.opcode - Instruction::INVOKE_VIRTUAL;
         } else {
-          target_method_idx = insn->VRegB_3rc();
+          target_method_idx = mir->dalvikInsn.vB;
           invoke_type_idx = mir->dalvikInsn.opcode - Instruction::INVOKE_VIRTUAL_RANGE;
         }
 
@@ -1338,9 +1372,9 @@ void MIRGraph::DoCacheMethodLoweringInfo() {
   }
 
   // Prepare unique method infos, set method info indexes for their MIRs.
-  DCHECK_EQ(method_lowering_infos_.Size(), 0u);
+  DCHECK_EQ(method_lowering_infos_.size(), 0u);
   const size_t count = invoke_map.size();
-  method_lowering_infos_.Resize(count);
+  method_lowering_infos_.reserve(count);
   for (size_t pos = 0u; pos != count; ++pos) {
     const MapEntry* entry = sequential_entries[pos];
     MirMethodLoweringInfo method_info(entry->target_method_idx,
@@ -1348,10 +1382,10 @@ void MIRGraph::DoCacheMethodLoweringInfo() {
     if (entry->devirt_target != nullptr) {
       method_info.SetDevirtualizationTarget(*entry->devirt_target);
     }
-    method_lowering_infos_.Insert(method_info);
+    method_lowering_infos_.push_back(method_info);
   }
   MirMethodLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
-                                 method_lowering_infos_.GetRawStorage(), count);
+                                 method_lowering_infos_.data(), count);
 }
 
 bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index d9531fb293..246ae44d14 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -901,7 +901,7 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
 
 /* Return the base virtual register for a SSA name */
 int MIRGraph::SRegToVReg(int ssa_reg) const {
-  return ssa_base_vregs_->Get(ssa_reg);
+  return ssa_base_vregs_[ssa_reg];
 }
 
 /* Any register that is used before being defined is considered live-in */
@@ -1025,14 +1025,14 @@ int MIRGraph::AddNewSReg(int v_reg) {
   int subscript = ++ssa_last_defs_[v_reg];
   uint32_t ssa_reg = GetNumSSARegs();
   SetNumSSARegs(ssa_reg + 1);
-  ssa_base_vregs_->Insert(v_reg);
-  ssa_subscripts_->Insert(subscript);
-  DCHECK_EQ(ssa_base_vregs_->Size(), ssa_subscripts_->Size());
+  ssa_base_vregs_.push_back(v_reg);
+  ssa_subscripts_.push_back(subscript);
+  DCHECK_EQ(ssa_base_vregs_.size(), ssa_subscripts_.size());
   // If we are expanding very late, update use counts too.
-  if (ssa_reg > 0 && use_counts_.Size() == ssa_reg) {
+  if (ssa_reg > 0 && use_counts_.size() == ssa_reg) {
     // Need to expand the counts.
-    use_counts_.Insert(0);
-    raw_use_counts_.Insert(0);
+    use_counts_.push_back(0);
+    raw_use_counts_.push_back(0);
   }
   return ssa_reg;
 }
@@ -1272,14 +1272,31 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
   return true;
 }
 
+void MIRGraph::InitializeBasicBlockDataFlow() {
+  /*
+   * Allocate the BasicBlockDataFlow structure for the entry and code blocks.
+   */
+  for (BasicBlock* bb : block_list_) {
+    if (bb->hidden == true) continue;
+    if (bb->block_type == kDalvikByteCode ||
+        bb->block_type == kEntryBlock ||
+        bb->block_type == kExitBlock) {
+      bb->data_flow_info =
+          static_cast<BasicBlockDataFlow*>(arena_->Alloc(sizeof(BasicBlockDataFlow),
+                                                         kArenaAllocDFInfo));
+      }
+  }
+}
+
 /* Setup the basic data structures for SSA conversion */
 void MIRGraph::CompilerInitializeSSAConversion() {
   size_t num_reg = GetNumOfCodeAndTempVRs();
 
-  ssa_base_vregs_ = new (arena_) GrowableArray<int>(arena_, num_reg + GetDefCount() + 128,
-                                                    kGrowableArraySSAtoDalvikMap);
-  ssa_subscripts_ = new (arena_) GrowableArray<int>(arena_, num_reg + GetDefCount() + 128,
-                                                    kGrowableArraySSAtoDalvikMap);
+  ssa_base_vregs_.clear();
+  ssa_base_vregs_.reserve(num_reg + GetDefCount() + 128);
+  ssa_subscripts_.clear();
+  ssa_subscripts_.reserve(num_reg + GetDefCount() + 128);
+
   /*
    * Initial number of SSA registers is equal to the number of Dalvik
    * registers.
@@ -1292,8 +1309,8 @@ void MIRGraph::CompilerInitializeSSAConversion() {
    * into "(0 << 16) | i"
    */
   for (unsigned int i = 0; i < num_reg; i++) {
-    ssa_base_vregs_->Insert(i);
-    ssa_subscripts_->Insert(0);
+    ssa_base_vregs_.push_back(i);
+    ssa_subscripts_.push_back(0);
   }
 
   /*
@@ -1318,73 +1335,7 @@ void MIRGraph::CompilerInitializeSSAConversion() {
   // The MIR graph keeps track of the sreg for method pointer specially, so record that now.
   method_sreg_ = method_temp->s_reg_low;
 
-  /*
-   * Allocate the BasicBlockDataFlow structure for the entry and code blocks
-   */
-  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
-
-  while (true) {
-    BasicBlock* bb = iterator.Next();
-    if (bb == NULL) break;
-    if (bb->hidden == true) continue;
-    if (bb->block_type == kDalvikByteCode ||
-      bb->block_type == kEntryBlock ||
-      bb->block_type == kExitBlock) {
-      bb->data_flow_info =
-          static_cast<BasicBlockDataFlow*>(arena_->Alloc(sizeof(BasicBlockDataFlow),
-                                                         kArenaAllocDFInfo));
-      }
-  }
-}
-
-/*
- * This function will make a best guess at whether the invoke will
- * end up using Method*.  It isn't critical to get it exactly right,
- * and attempting to do would involve more complexity than it's
- * worth.
- */
-bool MIRGraph::InvokeUsesMethodStar(MIR* mir) {
-  InvokeType type;
-  Instruction::Code opcode = mir->dalvikInsn.opcode;
-  switch (opcode) {
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      type = kStatic;
-      break;
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-      type = kDirect;
-      break;
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-      type = kVirtual;
-      break;
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      return false;
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_SUPER:
-      type = kSuper;
-      break;
-    default:
-      LOG(WARNING) << "Unexpected invoke op: " << opcode;
-      return false;
-  }
-  DexCompilationUnit m_unit(cu_);
-  MethodReference target_method(cu_->dex_file, mir->dalvikInsn.vB);
-  int vtable_idx;
-  uintptr_t direct_code;
-  uintptr_t direct_method;
-  uint32_t current_offset = static_cast<uint32_t>(current_offset_);
-  bool fast_path =
-      cu_->compiler_driver->ComputeInvokeInfo(&m_unit, current_offset,
-                                              false, true,
-                                              &type, &target_method,
-                                              &vtable_idx,
-                                              &direct_code, &direct_method) &&
-                                              !(cu_->enable_debug & (1 << kDebugSlowInvokePath));
-  return (((type == kDirect) || (type == kStatic)) &&
-          fast_path && ((direct_code == 0) || (direct_method == 0)));
+  InitializeBasicBlockDataFlow();
 }
 
 /*
@@ -1405,8 +1356,8 @@ void MIRGraph::CountUses(struct BasicBlock* bb) {
     }
     for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
       int s_reg = mir->ssa_rep->uses[i];
-      raw_use_counts_.Increment(s_reg);
-      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
+      raw_use_counts_[s_reg] += 1u;
+      use_counts_[s_reg] += weight;
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
       uint64_t df_attributes = GetDataFlowAttributes(mir);
@@ -1421,8 +1372,8 @@ void MIRGraph::CountUses(struct BasicBlock* bb) {
          * and save results for both here and GenInvoke.  For now, go ahead
          * and assume all invokes use method*.
          */
-        raw_use_counts_.Increment(method_sreg_);
-        use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight);
+        raw_use_counts_[method_sreg_] += 1u;
+        use_counts_[method_sreg_] += weight;
       }
     }
   }
@@ -1430,21 +1381,16 @@ void MIRGraph::CountUses(struct BasicBlock* bb) {
 
 /* Verify if all the successor is connected with all the claimed predecessors */
 bool MIRGraph::VerifyPredInfo(BasicBlock* bb) {
-  GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-
-  while (true) {
-    BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-    if (!pred_bb) break;
+  for (BasicBlockId pred_id : bb->predecessors) {
+    BasicBlock* pred_bb = GetBasicBlock(pred_id);
+    DCHECK(pred_bb != nullptr);
     bool found = false;
     if (pred_bb->taken == bb->id) {
         found = true;
     } else if (pred_bb->fall_through == bb->id) {
         found = true;
     } else if (pred_bb->successor_block_list_type != kNotUsed) {
-      GrowableArray<SuccessorBlockInfo*>::Iterator iterator(pred_bb->successor_blocks);
-      while (true) {
-        SuccessorBlockInfo *successor_block_info = iterator.Next();
-        if (successor_block_info == NULL) break;
+      for (SuccessorBlockInfo* successor_block_info : pred_bb->successor_blocks) {
         BasicBlockId succ_bb = successor_block_info->block;
         if (succ_bb == bb->id) {
             found = true;
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 93b774c67b..dda9e7781e 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -76,23 +76,23 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
     : reg_location_(NULL),
       block_id_map_(std::less<unsigned int>(), arena->Adapter()),
       cu_(cu),
-      ssa_base_vregs_(NULL),
-      ssa_subscripts_(NULL),
+      ssa_base_vregs_(arena->Adapter(kArenaAllocSSAToDalvikMap)),
+      ssa_subscripts_(arena->Adapter(kArenaAllocSSAToDalvikMap)),
       vreg_to_ssa_map_(NULL),
       ssa_last_defs_(NULL),
       is_constant_v_(NULL),
       constant_values_(NULL),
-      use_counts_(arena, 256, kGrowableArrayMisc),
-      raw_use_counts_(arena, 256, kGrowableArrayMisc),
+      use_counts_(arena->Adapter()),
+      raw_use_counts_(arena->Adapter()),
       num_reachable_blocks_(0),
       max_num_reachable_blocks_(0),
-      dfs_order_(NULL),
-      dfs_post_order_(NULL),
-      dom_post_order_traversal_(NULL),
-      topological_order_(nullptr),
-      topological_order_loop_ends_(nullptr),
-      topological_order_indexes_(nullptr),
-      topological_order_loop_head_stack_(nullptr),
+      dfs_order_(arena->Adapter(kArenaAllocDfsPreOrder)),
+      dfs_post_order_(arena->Adapter(kArenaAllocDfsPostOrder)),
+      dom_post_order_traversal_(arena->Adapter(kArenaAllocDomPostOrder)),
+      topological_order_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
+      topological_order_loop_ends_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
+      topological_order_indexes_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
+      topological_order_loop_head_stack_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
       temp_scoped_alloc_(),
@@ -100,13 +100,13 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
       temp_bit_vector_size_(0u),
       temp_bit_vector_(nullptr),
       temp_gvn_(),
-      block_list_(arena, 100, kGrowableArrayBlockList),
+      block_list_(arena->Adapter(kArenaAllocBBList)),
       try_block_addr_(NULL),
       entry_block_(NULL),
       exit_block_(NULL),
       num_blocks_(0),
       current_code_item_(NULL),
-      dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc),
+      dex_pc_to_block_map_(arena->Adapter()),
       m_units_(arena->Adapter()),
       method_stack_(arena->Adapter()),
       current_method_(kInvalidEntry),
@@ -127,10 +127,13 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
       compiler_temps_committed_(false),
       punt_to_interpreter_(false),
       merged_df_flags_(0u),
-      ifield_lowering_infos_(arena, 0u),
-      sfield_lowering_infos_(arena, 0u),
-      method_lowering_infos_(arena, 0u),
-      gen_suspend_test_list_(arena, 0u) {
+      ifield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
+      sfield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
+      method_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
+      gen_suspend_test_list_(arena->Adapter()) {
+  use_counts_.reserve(256);
+  raw_use_counts_.reserve(256);
+  block_list_.reserve(100);
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
 
 
@@ -149,6 +152,7 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
 }
 
 MIRGraph::~MIRGraph() {
+  STLDeleteElements(&block_list_);
   STLDeleteElements(&m_units_);
 }
 
@@ -183,8 +187,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
   if (insn == NULL) {
     LOG(FATAL) << "Break split failed";
   }
-  BasicBlock* bottom_block = NewMemBB(kDalvikByteCode, num_blocks_++);
-  block_list_.Insert(bottom_block);
+  BasicBlock* bottom_block = CreateNewBB(kDalvikByteCode);
 
   bottom_block->start_offset = code_offset;
   bottom_block->first_mir_insn = insn;
@@ -207,34 +210,31 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
   if (bottom_block->taken != NullBasicBlockId) {
     orig_block->taken = NullBasicBlockId;
     BasicBlock* bb_taken = GetBasicBlock(bottom_block->taken);
-    bb_taken->predecessors->Delete(orig_block->id);
-    bb_taken->predecessors->Insert(bottom_block->id);
+    bb_taken->ErasePredecessor(orig_block->id);
+    bb_taken->predecessors.push_back(bottom_block->id);
   }
 
   /* Handle the fallthrough path */
   bottom_block->fall_through = orig_block->fall_through;
   orig_block->fall_through = bottom_block->id;
-  bottom_block->predecessors->Insert(orig_block->id);
+  bottom_block->predecessors.push_back(orig_block->id);
   if (bottom_block->fall_through != NullBasicBlockId) {
     BasicBlock* bb_fall_through = GetBasicBlock(bottom_block->fall_through);
-    bb_fall_through->predecessors->Delete(orig_block->id);
-    bb_fall_through->predecessors->Insert(bottom_block->id);
+    bb_fall_through->ErasePredecessor(orig_block->id);
+    bb_fall_through->predecessors.push_back(bottom_block->id);
   }
 
   /* Handle the successor list */
   if (orig_block->successor_block_list_type != kNotUsed) {
     bottom_block->successor_block_list_type = orig_block->successor_block_list_type;
-    bottom_block->successor_blocks = orig_block->successor_blocks;
+    bottom_block->successor_blocks.swap(orig_block->successor_blocks);
     orig_block->successor_block_list_type = kNotUsed;
-    orig_block->successor_blocks = nullptr;
-    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks);
-    while (true) {
-      SuccessorBlockInfo* successor_block_info = iterator.Next();
-      if (successor_block_info == nullptr) break;
+    DCHECK(orig_block->successor_blocks.empty());  // Empty after the swap() above.
+    for (SuccessorBlockInfo* successor_block_info : bottom_block->successor_blocks) {
       BasicBlock* bb = GetBasicBlock(successor_block_info->block);
       if (bb != nullptr) {
-        bb->predecessors->Delete(orig_block->id);
-        bb->predecessors->Insert(bottom_block->id);
+        bb->ErasePredecessor(orig_block->id);
+        bb->predecessors.push_back(bottom_block->id);
       }
     }
   }
@@ -258,9 +258,9 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
   DCHECK_EQ(insn->offset, bottom_block->start_offset);
   DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
          !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode));
-  DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id);
+  DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id);
   MIR* p = insn;
-  dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+  dex_pc_to_block_map_[p->offset] = bottom_block->id;
   while (p != bottom_block->last_mir_insn) {
     p = p->next;
     DCHECK(p != nullptr);
@@ -273,8 +273,8 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
      * the first in a BasicBlock, we can't hit it here.
      */
     if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-      DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id);
-      dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+      DCHECK_EQ(dex_pc_to_block_map_[p->offset], orig_block->id);
+      dex_pc_to_block_map_[p->offset] = bottom_block->id;
     }
   }
 
@@ -295,8 +295,8 @@ BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool split, bool create,
     return NULL;
   }
 
-  int block_id = dex_pc_to_block_map_.Get(code_offset);
-  BasicBlock* bb = (block_id == 0) ? NULL : block_list_.Get(block_id);
+  int block_id = dex_pc_to_block_map_[code_offset];
+  BasicBlock* bb = GetBasicBlock(block_id);
 
   if ((bb != NULL) && (bb->start_offset == code_offset)) {
     // Does this containing block start with the desired instruction?
@@ -314,10 +314,9 @@ BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool split, bool create,
   }
 
   // Create a new block.
-  bb = NewMemBB(kDalvikByteCode, num_blocks_++);
-  block_list_.Insert(bb);
+  bb = CreateNewBB(kDalvikByteCode);
   bb->start_offset = code_offset;
-  dex_pc_to_block_map_.Put(bb->start_offset, bb->id);
+  dex_pc_to_block_map_[bb->start_offset] = bb->id;
   return bb;
 }
 
@@ -457,7 +456,7 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
   BasicBlock* taken_block = FindBlock(target, /* split */ true, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block);
   cur_block->taken = taken_block->id;
-  taken_block->predecessors->Insert(cur_block->id);
+  taken_block->predecessors.push_back(cur_block->id);
 
   /* Always terminate the current block for conditional branches */
   if (flags & Instruction::kContinue) {
@@ -480,7 +479,7 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
                                              /* immed_pred_block_p */
                                              &cur_block);
     cur_block->fall_through = fallthrough_block->id;
-    fallthrough_block->predecessors->Insert(cur_block->id);
+    fallthrough_block->predecessors.push_back(cur_block->id);
   } else if (code_ptr < code_end) {
     FindBlock(cur_offset + width, /* split */ false, /* create */ true,
                 /* immed_pred_block_p */ NULL);
@@ -539,8 +538,7 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
   }
   cur_block->successor_block_list_type =
       (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ?  kPackedSwitch : kSparseSwitch;
-  cur_block->successor_blocks =
-      new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, size, kGrowableArraySuccessorBlocks);
+  cur_block->successor_blocks.reserve(size);
 
   for (i = 0; i < size; i++) {
     BasicBlock* case_block = FindBlock(cur_offset + target_table[i], /* split */ true,
@@ -552,15 +550,15 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
     successor_block_info->key =
         (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ?
         first_key + i : keyTable[i];
-    cur_block->successor_blocks->Insert(successor_block_info);
-    case_block->predecessors->Insert(cur_block->id);
+    cur_block->successor_blocks.push_back(successor_block_info);
+    case_block->predecessors.push_back(cur_block->id);
   }
 
   /* Fall-through case */
   BasicBlock* fallthrough_block = FindBlock(cur_offset +  width, /* split */ false,
                                             /* create */ true, /* immed_pred_block_p */ NULL);
   cur_block->fall_through = fallthrough_block->id;
-  fallthrough_block->predecessors->Insert(cur_block->id);
+  fallthrough_block->predecessors.push_back(cur_block->id);
   return cur_block;
 }
 
@@ -593,8 +591,6 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
       }
       if (cur_block->successor_block_list_type == kNotUsed) {
         cur_block->successor_block_list_type = kCatch;
-        cur_block->successor_blocks = new (arena_) GrowableArray<SuccessorBlockInfo*>(
-            arena_, 2, kGrowableArraySuccessorBlocks);
       }
       catch_block->catch_entry = true;
       if (kIsDebugBuild) {
@@ -604,17 +600,16 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
           (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
       successor_block_info->block = catch_block->id;
       successor_block_info->key = iterator.GetHandlerTypeIndex();
-      cur_block->successor_blocks->Insert(successor_block_info);
-      catch_block->predecessors->Insert(cur_block->id);
+      cur_block->successor_blocks.push_back(successor_block_info);
+      catch_block->predecessors.push_back(cur_block->id);
     }
     in_try_block = (cur_block->successor_block_list_type != kNotUsed);
   }
   if (!in_try_block && build_all_edges) {
-    BasicBlock* eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
+    BasicBlock* eh_block = CreateNewBB(kExceptionHandling);
     cur_block->taken = eh_block->id;
-    block_list_.Insert(eh_block);
     eh_block->start_offset = cur_offset;
-    eh_block->predecessors->Insert(cur_block->id);
+    eh_block->predecessors.push_back(cur_block->id);
   }
 
   if (is_throw) {
@@ -657,11 +652,10 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
    * Note also that the dex_pc_to_block_map_ entry for the potentially
    * throwing instruction will refer to the original basic block.
    */
-  BasicBlock* new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
-  block_list_.Insert(new_block);
+  BasicBlock* new_block = CreateNewBB(kDalvikByteCode);
   new_block->start_offset = insn->offset;
   cur_block->fall_through = new_block->id;
-  new_block->predecessors->Insert(cur_block->id);
+  new_block->predecessors.push_back(cur_block->id);
   MIR* new_insn = NewMIR();
   *new_insn = *insn;
   insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck);
@@ -689,8 +683,8 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
 
   // TODO: need to rework expansion of block list & try_block_addr when inlining activated.
   // TUNING: use better estimate of basic blocks for following resize.
-  block_list_.Resize(block_list_.Size() + current_code_item_->insns_size_in_code_units_);
-  dex_pc_to_block_map_.SetSize(dex_pc_to_block_map_.Size() + current_code_item_->insns_size_in_code_units_);
+  block_list_.reserve(block_list_.size() + current_code_item_->insns_size_in_code_units_);
+  dex_pc_to_block_map_.resize(dex_pc_to_block_map_.size() + current_code_item_->insns_size_in_code_units_);
 
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -702,14 +696,11 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
     DCHECK(exit_block_ == NULL);
     DCHECK_EQ(num_blocks_, 0U);
     // Use id 0 to represent a null block.
-    BasicBlock* null_block = NewMemBB(kNullBlock, num_blocks_++);
+    BasicBlock* null_block = CreateNewBB(kNullBlock);
     DCHECK_EQ(null_block->id, NullBasicBlockId);
     null_block->hidden = true;
-    block_list_.Insert(null_block);
-    entry_block_ = NewMemBB(kEntryBlock, num_blocks_++);
-    block_list_.Insert(entry_block_);
-    exit_block_ = NewMemBB(kExitBlock, num_blocks_++);
-    block_list_.Insert(exit_block_);
+    entry_block_ = CreateNewBB(kEntryBlock);
+    exit_block_ = CreateNewBB(kExitBlock);
     // TODO: deprecate all "cu->" fields; move what's left to wherever CompilationUnit is allocated.
     cu_->dex_file = &dex_file;
     cu_->class_def_idx = class_def_idx;
@@ -717,7 +708,6 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
     cu_->access_flags = access_flags;
     cu_->invoke_type = invoke_type;
     cu_->shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-    cu_->code_item = current_code_item_;
   } else {
     UNIMPLEMENTED(FATAL) << "Nested inlining not implemented.";
     /*
@@ -727,13 +717,12 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
   }
 
   /* Current block to record parsed instructions */
-  BasicBlock* cur_block = NewMemBB(kDalvikByteCode, num_blocks_++);
+  BasicBlock* cur_block = CreateNewBB(kDalvikByteCode);
   DCHECK_EQ(current_offset_, 0U);
   cur_block->start_offset = current_offset_;
-  block_list_.Insert(cur_block);
   // TODO: for inlining support, insert at the insert point rather than entry block.
   entry_block_->fall_through = cur_block->id;
-  cur_block->predecessors->Insert(entry_block_->id);
+  cur_block->predecessors.push_back(entry_block_->id);
 
   /* Identify code range in try blocks and set up the empty catch blocks */
   ProcessTryCatchBlocks();
@@ -791,7 +780,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
     }
 
     // Associate the starting dex_pc for this opcode with its containing basic block.
-    dex_pc_to_block_map_.Put(insn->offset, cur_block->id);
+    dex_pc_to_block_map_[insn->offset] = cur_block->id;
 
     code_ptr += width;
 
@@ -801,7 +790,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
     } else if (flags & Instruction::kReturn) {
       cur_block->terminated_by_return = true;
       cur_block->fall_through = exit_block_->id;
-      exit_block_->predecessors->Insert(cur_block->id);
+      exit_block_->predecessors.push_back(cur_block->id);
       /*
        * Terminate the current block if there are instructions
        * afterwards.
@@ -850,7 +839,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
 
       if ((cur_block->fall_through == NullBasicBlockId) && (flags & Instruction::kContinue)) {
         cur_block->fall_through = next_block->id;
-        next_block->predecessors->Insert(cur_block->id);
+        next_block->predecessors.push_back(cur_block->id);
       }
       cur_block = next_block;
     }
@@ -915,7 +904,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff
   int idx;
 
   for (idx = 0; idx < num_blocks; idx++) {
-    int block_idx = all_blocks ? idx : dfs_order_->Get(idx);
+    int block_idx = all_blocks ? idx : dfs_order_[idx];
     BasicBlock* bb = GetBasicBlock(block_idx);
     if (bb == NULL) continue;
     if (bb->block_type == kDead) continue;
@@ -971,23 +960,17 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff
       fprintf(file, "  succ%04x_%d [shape=%s,label = \"{ \\\n",
               bb->start_offset, bb->id,
               (bb->successor_block_list_type == kCatch) ?  "Mrecord" : "record");
-      GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks);
-      SuccessorBlockInfo* successor_block_info = iterator.Next();
 
+      int last_succ_id = static_cast<int>(bb->successor_blocks.size() - 1u);
       int succ_id = 0;
-      while (true) {
-        if (successor_block_info == NULL) break;
-
+      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
         BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
-        SuccessorBlockInfo *next_successor_block_info = iterator.Next();
-
         fprintf(file, "    {<f%d> %04x: %04x\\l}%s\\\n",
-                succ_id++,
+                succ_id,
                 successor_block_info->key,
                 dest_block->start_offset,
-                (next_successor_block_info != NULL) ? " | " : " ");
-
-        successor_block_info = next_successor_block_info;
+                (succ_id != last_succ_id) ? " | " : " ");
+        ++succ_id;
       }
       fprintf(file, "  }\"];\n\n");
 
@@ -996,13 +979,8 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff
               block_name1, bb->start_offset, bb->id);
 
       // Link the successor pseudo-block with all of its potential targets.
-      GrowableArray<SuccessorBlockInfo*>::Iterator iter(bb->successor_blocks);
-
       succ_id = 0;
-      while (true) {
-        SuccessorBlockInfo* successor_block_info = iter.Next();
-        if (successor_block_info == NULL) break;
-
+      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
         BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
 
         GetBlockName(dest_block, block_name2);
@@ -1425,9 +1403,11 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
     opcode = insn.opcode;
   } else if (opcode == kMirOpNop) {
     str.append("[");
-    // Recover original opcode.
-    insn.opcode = Instruction::At(current_code_item_->insns_ + mir->offset)->Opcode();
-    opcode = insn.opcode;
+    if (mir->offset < current_code_item_->insns_size_in_code_units_) {
+      // Recover original opcode.
+      insn.opcode = Instruction::At(current_code_item_->insns_ + mir->offset)->Opcode();
+      opcode = insn.opcode;
+    }
     nop = true;
   }
   int defs = (ssa_rep != NULL) ? ssa_rep->num_defs : 0;
@@ -1603,7 +1583,6 @@ const char* MIRGraph::GetShortyFromTargetIdx(int target_idx) {
 
 /* Debug Utility - dump a compilation unit */
 void MIRGraph::DumpMIRGraph() {
-  BasicBlock* bb;
   const char* block_type_names[] = {
     "Null Block",
     "Entry Block",
@@ -1616,11 +1595,8 @@ void MIRGraph::DumpMIRGraph() {
   LOG(INFO) << "Compiling " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   LOG(INFO) << GetInsns(0) << " insns";
   LOG(INFO) << GetNumBlocks() << " blocks in total";
-  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
 
-  while (true) {
-    bb = iterator.Next();
-    if (bb == NULL) break;
+  for (BasicBlock* bb : block_list_) {
     LOG(INFO) << StringPrintf("Block %d (%s) (insn %04x - %04x%s)",
         bb->id,
         block_type_names[bb->block_type],
@@ -1678,15 +1654,10 @@ MIR* MIRGraph::NewMIR() {
 
 // Allocate a new basic block.
 BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
-  BasicBlock* bb = new (arena_) BasicBlock();
+  BasicBlock* bb = new (arena_) BasicBlock(block_id, block_type, arena_);
 
-  bb->block_type = block_type;
-  bb->id = block_id;
   // TUNING: better estimate of the exit block predecessors?
-  bb->predecessors = new (arena_) GrowableArray<BasicBlockId>(arena_,
-                                                             (block_type == kExitBlock) ? 2048 : 2,
-                                                             kGrowableArrayPredecessors);
-  bb->successor_block_list_type = kNotUsed;
+  bb->predecessors.reserve((block_type == kExitBlock) ? 2048 : 2);
   block_id_map_.Put(block_id, block_id);
   return bb;
 }
@@ -1699,16 +1670,12 @@ void MIRGraph::InitializeConstantPropagation() {
 void MIRGraph::InitializeMethodUses() {
   // The gate starts by initializing the use counts.
   int num_ssa_regs = GetNumSSARegs();
-  use_counts_.Resize(num_ssa_regs + 32);
-  raw_use_counts_.Resize(num_ssa_regs + 32);
-  // Resize does not actually reset the number of used, so reset before initialization.
-  use_counts_.Reset();
-  raw_use_counts_.Reset();
-  // Initialize list.
-  for (int i = 0; i < num_ssa_regs; i++) {
-    use_counts_.Insert(0);
-    raw_use_counts_.Insert(0);
-  }
+  use_counts_.clear();
+  use_counts_.reserve(num_ssa_regs + 32);
+  use_counts_.resize(num_ssa_regs, 0u);
+  raw_use_counts_.clear();
+  raw_use_counts_.reserve(num_ssa_regs + 32);
+  raw_use_counts_.resize(num_ssa_regs, 0u);
 }
 
 void MIRGraph::SSATransformationStart() {
@@ -1717,9 +1684,6 @@ void MIRGraph::SSATransformationStart() {
   temp_bit_vector_size_ = GetNumOfCodeAndTempVRs();
   temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapRegisterV);
-
-  // Update the maximum number of reachable blocks.
-  max_num_reachable_blocks_ = num_reachable_blocks_;
 }
 
 void MIRGraph::SSATransformationEnd() {
@@ -1732,6 +1696,36 @@ void MIRGraph::SSATransformationEnd() {
   temp_bit_vector_ = nullptr;
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   temp_scoped_alloc_.reset();
+
+  // Update the maximum number of reachable blocks.
+  max_num_reachable_blocks_ = num_reachable_blocks_;
+}
+
+size_t MIRGraph::GetNumDalvikInsns() const {
+  size_t cumulative_size = 0u;
+  bool counted_current_item = false;
+  const uint8_t size_for_null_code_item = 2u;
+
+  for (auto it : m_units_) {
+    const DexFile::CodeItem* code_item = it->GetCodeItem();
+    // Even if the code item is null, we still count non-zero value so that
+    // each m_unit is counted as having impact.
+    cumulative_size += (code_item == nullptr ?
+        size_for_null_code_item : code_item->insns_size_in_code_units_);
+    if (code_item == current_code_item_) {
+      counted_current_item = true;
+    }
+  }
+
+  // If the current code item was not counted yet, count it now.
+  // This can happen for example in unit tests where some fields like m_units_
+  // are not initialized.
+  if (counted_current_item == false) {
+    cumulative_size += (current_code_item_ == nullptr ?
+        size_for_null_code_item : current_code_item_->insns_size_in_code_units_);
+  }
+
+  return cumulative_size;
 }
 
 static BasicBlock* SelectTopologicalSortOrderFallBack(
@@ -1800,9 +1794,9 @@ static void ComputeUnvisitedReachableFrom(MIRGraph* mir_graph, BasicBlockId bb_i
     tmp_stack->pop_back();
     BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
     DCHECK(current_bb != nullptr);
-    GrowableArray<BasicBlockId>::Iterator iter(current_bb->predecessors);
-    BasicBlock* pred_bb = mir_graph->GetBasicBlock(iter.Next());
-    for ( ; pred_bb != nullptr; pred_bb = mir_graph->GetBasicBlock(iter.Next())) {
+    for (BasicBlockId pred_id : current_bb->predecessors) {
+      BasicBlock* pred_bb = mir_graph->GetBasicBlock(pred_id);
+      DCHECK(pred_bb != nullptr);
       if (!pred_bb->visited && !reachable->IsBitSet(pred_bb->id)) {
         reachable->SetBit(pred_bb->id);
         tmp_stack->push_back(pred_bb->id);
@@ -1823,36 +1817,27 @@ void MIRGraph::ComputeTopologicalSortOrder() {
   loop_exit_blocks.ClearAllBits();
 
   // Count the number of blocks to process and add the entry block(s).
-  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
   unsigned int num_blocks_to_process = 0u;
-  for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+  for (BasicBlock* bb : block_list_) {
     if (bb->hidden == true) {
       continue;
     }
 
     num_blocks_to_process += 1u;
 
-    if (bb->predecessors->Size() == 0u) {
+    if (bb->predecessors.size() == 0u) {
       // Add entry block to the queue.
       q.push(bb);
     }
   }
 
-  // Create the topological order if need be.
-  if (topological_order_ == nullptr) {
-    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, num_blocks);
-    topological_order_loop_ends_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks);
-    topological_order_indexes_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks);
-  }
-  topological_order_->Reset();
-  topological_order_loop_ends_->Reset();
-  topological_order_indexes_->Reset();
-  topological_order_loop_ends_->Resize(num_blocks);
-  topological_order_indexes_->Resize(num_blocks);
-  for (BasicBlockId i = 0; i != num_blocks; ++i) {
-    topological_order_loop_ends_->Insert(0u);
-    topological_order_indexes_->Insert(static_cast<uint16_t>(-1));
-  }
+  // Clear the topological order arrays.
+  topological_order_.clear();
+  topological_order_.reserve(num_blocks);
+  topological_order_loop_ends_.clear();
+  topological_order_loop_ends_.resize(num_blocks, 0u);
+  topological_order_indexes_.clear();
+  topological_order_indexes_.resize(num_blocks, static_cast<uint16_t>(-1));
 
   // Mark all blocks as unvisited.
   ClearAllVisitedFlags();
@@ -1875,8 +1860,8 @@ void MIRGraph::ComputeTopologicalSortOrder() {
       if (bb->visited) {
         // Loop head: it was already processed, mark end and copy exit blocks to the queue.
         DCHECK(q.empty()) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-        uint16_t idx = static_cast<uint16_t>(topological_order_->Size());
-        topological_order_loop_ends_->Put(topological_order_indexes_->Get(bb->id), idx);
+        uint16_t idx = static_cast<uint16_t>(topological_order_.size());
+        topological_order_loop_ends_[topological_order_indexes_[bb->id]] = idx;
         DCHECK_EQ(loop_head_stack.back(), bb->id);
         loop_head_stack.pop_back();
         ArenaBitVector* reachable =
@@ -1919,15 +1904,16 @@ void MIRGraph::ComputeTopologicalSortOrder() {
           continue;
         }
 
-        GrowableArray<BasicBlockId>::Iterator pred_iter(candidate->predecessors);
-        BasicBlock* pred_bb = GetBasicBlock(pred_iter.Next());
-        for ( ; pred_bb != nullptr; pred_bb = GetBasicBlock(pred_iter.Next())) {
+        for (BasicBlockId pred_id : candidate->predecessors) {
+          BasicBlock* pred_bb = GetBasicBlock(pred_id);
+          DCHECK(pred_bb != nullptr);
           if (pred_bb != candidate && !pred_bb->visited &&
               !pred_bb->dominators->IsBitSet(candidate->id)) {
-            break;  // Keep non-null pred_bb to indicate failure.
+            candidate = nullptr;  // Set candidate to null to indicate failure.
+            break;
           }
         }
-        if (pred_bb == nullptr) {
+        if (candidate != nullptr) {
           bb = candidate;
           break;
         }
@@ -1947,9 +1933,9 @@ void MIRGraph::ComputeTopologicalSortOrder() {
     bb->visited = true;
 
     // Now add the basic block.
-    uint16_t idx = static_cast<uint16_t>(topological_order_->Size());
-    topological_order_indexes_->Put(bb->id, idx);
-    topological_order_->Insert(bb->id);
+    uint16_t idx = static_cast<uint16_t>(topological_order_.size());
+    topological_order_indexes_[bb->id] = idx;
+    topological_order_.push_back(bb->id);
 
     // Update visited_cnt_values for children.
     ChildBlockIterator succIter(bb, this);
@@ -1961,7 +1947,7 @@ void MIRGraph::ComputeTopologicalSortOrder() {
 
       // One more predecessor was visited.
       visited_cnt_values[successor->id] += 1u;
-      if (visited_cnt_values[successor->id] == successor->predecessors->Size()) {
+      if (visited_cnt_values[successor->id] == successor->predecessors.size()) {
         if (loop_head_stack.empty() ||
             loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(successor->id)) {
           q.push(successor);
@@ -1974,8 +1960,8 @@ void MIRGraph::ComputeTopologicalSortOrder() {
   }
 
   // Prepare the loop head stack for iteration.
-  topological_order_loop_head_stack_ =
-      new (arena_) GrowableArray<std::pair<uint16_t, bool>>(arena_, max_nested_loops);
+  topological_order_loop_head_stack_.clear();
+  topological_order_loop_head_stack_.reserve(max_nested_loops);
 }
 
 bool BasicBlock::IsExceptionBlock() const {
@@ -1992,8 +1978,8 @@ bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id)
     return false;
 
   int idx;
-  for (idx = gen_suspend_test_list_.Size() - 1; idx >= 0; idx--) {
-    BasicBlock* bb = gen_suspend_test_list_.Get(idx);
+  for (idx = gen_suspend_test_list_.size() - 1; idx >= 0; idx--) {
+    BasicBlock* bb = gen_suspend_test_list_[idx];
     if (bb == source)
       return true;  // The block has been inserted by a suspend check before.
     if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id))
@@ -2009,7 +1995,7 @@ ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
   // Check if we actually do have successors.
   if (basic_block_ != 0 && basic_block_->successor_block_list_type != kNotUsed) {
     have_successors_ = true;
-    successor_iter_.Reset(basic_block_->successor_blocks);
+    successor_iter_ = basic_block_->successor_blocks.cbegin();
   }
 }
 
@@ -2042,9 +2028,10 @@ BasicBlock* ChildBlockIterator::Next() {
   // We visited both taken and fallthrough. Now check if we have successors we need to visit.
   if (have_successors_ == true) {
     // Get information about next successor block.
-    for (SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
-      successor_block_info != nullptr;
-      successor_block_info = successor_iter_.Next()) {
+    auto end = basic_block_->successor_blocks.cend();
+    while (successor_iter_ != end) {
+      SuccessorBlockInfo* successor_block_info = *successor_iter_;
+      ++successor_iter_;
       // If block was replaced by zero block, take next one.
       if (successor_block_info->block != NullBasicBlockId) {
         return mir_graph_->GetBasicBlock(successor_block_info->block);
@@ -2075,17 +2062,12 @@ BasicBlock* BasicBlock::Copy(MIRGraph* mir_graph) {
 
   result_bb->successor_block_list_type = successor_block_list_type;
   if (result_bb->successor_block_list_type != kNotUsed) {
-    size_t size = successor_blocks->Size();
-    result_bb->successor_blocks = new (arena) GrowableArray<SuccessorBlockInfo*>(arena, size, kGrowableArraySuccessorBlocks);
-    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(successor_blocks);
-    while (true) {
-      SuccessorBlockInfo* sbi_old = iterator.Next();
-      if (sbi_old == nullptr) {
-        break;
-      }
-      SuccessorBlockInfo* sbi_new = static_cast<SuccessorBlockInfo*>(arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+    result_bb->successor_blocks.reserve(successor_blocks.size());
+    for (SuccessorBlockInfo* sbi_old : successor_blocks) {
+      SuccessorBlockInfo* sbi_new = static_cast<SuccessorBlockInfo*>(
+          arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
       memcpy(sbi_new, sbi_old, sizeof(SuccessorBlockInfo));
-      result_bb->successor_blocks->Insert(sbi_new);
+      result_bb->successor_blocks.push_back(sbi_new);
     }
   }
 
@@ -2244,14 +2226,10 @@ void BasicBlock::Hide(CompilationUnit* c_unit) {
   first_mir_insn = nullptr;
   last_mir_insn = nullptr;
 
-  GrowableArray<BasicBlockId>::Iterator iterator(predecessors);
-
   MIRGraph* mir_graph = c_unit->mir_graph.get();
-  while (true) {
-    BasicBlock* pred_bb = mir_graph->GetBasicBlock(iterator.Next());
-    if (pred_bb == nullptr) {
-      break;
-    }
+  for (BasicBlockId pred_id : predecessors) {
+    BasicBlock* pred_bb = mir_graph->GetBasicBlock(pred_id);
+    DCHECK(pred_bb != nullptr);
 
     // Sadly we have to go through the children by hand here.
     pred_bb->ReplaceChild(id, NullBasicBlockId);
@@ -2261,8 +2239,8 @@ void BasicBlock::Hide(CompilationUnit* c_unit) {
   ChildBlockIterator successorChildIter(this, mir_graph);
 
   for (BasicBlock* childPtr = successorChildIter.Next(); childPtr != 0; childPtr = successorChildIter.Next()) {
-    // Replace child with null child.
-    childPtr->predecessors->Delete(id);
+    // Erase this predecessor from child.
+    childPtr->ErasePredecessor(id);
   }
 
   // Remove link to children.
@@ -2328,12 +2306,7 @@ bool BasicBlock::ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb) {
   }
 
   if (successor_block_list_type != kNotUsed) {
-    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(successor_blocks);
-    while (true) {
-      SuccessorBlockInfo* successor_block_info = iterator.Next();
-      if (successor_block_info == nullptr) {
-        break;
-      }
+    for (SuccessorBlockInfo* successor_block_info : successor_blocks) {
       if (successor_block_info->block == old_bb) {
         successor_block_info->block = new_bb;
         found = true;
@@ -2344,28 +2317,20 @@ bool BasicBlock::ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb) {
   return found;
 }
 
-void BasicBlock::UpdatePredecessor(BasicBlockId old_parent, BasicBlockId new_parent) {
-  GrowableArray<BasicBlockId>::Iterator iterator(predecessors);
-  bool found = false;
-
-  while (true) {
-    BasicBlockId pred_bb_id = iterator.Next();
-
-    if (pred_bb_id == NullBasicBlockId) {
-      break;
-    }
-
-    if (pred_bb_id == old_parent) {
-      size_t idx = iterator.GetIndex() - 1;
-      predecessors->Put(idx, new_parent);
-      found = true;
-      break;
-    }
-  }
+void BasicBlock::ErasePredecessor(BasicBlockId old_pred) {
+  auto pos = std::find(predecessors.begin(), predecessors.end(), old_pred);
+  DCHECK(pos != predecessors.end());
+  predecessors.erase(pos);
+}
 
-  // If not found, add it.
-  if (found == false) {
-    predecessors->Insert(new_parent);
+void BasicBlock::UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred) {
+  DCHECK_NE(new_pred, NullBasicBlockId);
+  auto pos = std::find(predecessors.begin(), predecessors.end(), old_pred);
+  if (pos != predecessors.end()) {
+    *pos = new_pred;
+  } else {
+    // If not found, add it.
+    predecessors.push_back(new_pred);
   }
 }
 
@@ -2373,7 +2338,7 @@ void BasicBlock::UpdatePredecessor(BasicBlockId old_parent, BasicBlockId new_par
 // post-incremented.
 BasicBlock* MIRGraph::CreateNewBB(BBType block_type) {
   BasicBlock* res = NewMemBB(block_type, num_blocks_++);
-  block_list_.Insert(res);
+  block_list_.push_back(res);
   return res;
 }
 
@@ -2383,7 +2348,7 @@ void MIRGraph::CalculateBasicBlockInformation() {
 }
 
 void MIRGraph::InitializeBasicBlockData() {
-  num_blocks_ = block_list_.Size();
+  num_blocks_ = block_list_.size();
 }
 
 int MIR::DecodedInstruction::FlagsOf() const {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 078970d611..f14b1876e8 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -27,7 +27,6 @@
 #include "mir_field_info.h"
 #include "mir_method_info.h"
 #include "utils/arena_bit_vector.h"
-#include "utils/growable_array.h"
 #include "utils/arena_containers.h"
 #include "utils/scoped_arena_containers.h"
 #include "reg_location.h"
@@ -37,40 +36,6 @@ namespace art {
 
 class GlobalValueNumbering;
 
-enum InstructionAnalysisAttributePos {
-  kUninterestingOp = 0,
-  kArithmeticOp,
-  kFPOp,
-  kSingleOp,
-  kDoubleOp,
-  kIntOp,
-  kLongOp,
-  kBranchOp,
-  kInvokeOp,
-  kArrayOp,
-  kHeavyweightOp,
-  kSimpleConstOp,
-  kMoveOp,
-  kSwitch
-};
-
-#define AN_NONE (1 << kUninterestingOp)
-#define AN_MATH (1 << kArithmeticOp)
-#define AN_FP (1 << kFPOp)
-#define AN_LONG (1 << kLongOp)
-#define AN_INT (1 << kIntOp)
-#define AN_SINGLE (1 << kSingleOp)
-#define AN_DOUBLE (1 << kDoubleOp)
-#define AN_FLOATMATH (1 << kFPOp)
-#define AN_BRANCH (1 << kBranchOp)
-#define AN_INVOKE (1 << kInvokeOp)
-#define AN_ARRAYOP (1 << kArrayOp)
-#define AN_HEAVYWEIGHT (1 << kHeavyweightOp)
-#define AN_SIMPLECONST (1 << kSimpleConstOp)
-#define AN_MOVE (1 << kMoveOp)
-#define AN_SWITCH (1 << kSwitch)
-#define AN_COMPUTATIONAL (AN_MATH | AN_ARRAYOP | AN_MOVE | AN_SIMPLECONST)
-
 enum DataFlowAttributePos {
   kUA = 0,
   kUB,
@@ -394,6 +359,17 @@ struct MIR {
 struct SuccessorBlockInfo;
 
 struct BasicBlock {
+  BasicBlock(BasicBlockId block_id, BBType type, ArenaAllocator* allocator)
+      : id(block_id),
+        dfs_id(), start_offset(), fall_through(), taken(), i_dom(), nesting_depth(),
+        block_type(type),
+        successor_block_list_type(kNotUsed),
+        visited(), hidden(), catch_entry(), explicit_throw(), conditional_branch(),
+        terminated_by_return(), dominates_return(), use_lvn(), first_mir_insn(),
+        last_mir_insn(), data_flow_info(), dominators(), i_dominated(), dom_frontier(),
+        predecessors(allocator->Adapter(kArenaAllocBBPredecessors)),
+        successor_blocks(allocator->Adapter(kArenaAllocSuccessor)) {
+  }
   BasicBlockId id;
   BasicBlockId dfs_id;
   NarrowDexOffset start_offset;     // Offset in code units.
@@ -417,8 +393,8 @@ struct BasicBlock {
   ArenaBitVector* dominators;
   ArenaBitVector* i_dominated;      // Set nodes being immediately dominated.
   ArenaBitVector* dom_frontier;     // Dominance frontier.
-  GrowableArray<BasicBlockId>* predecessors;
-  GrowableArray<SuccessorBlockInfo*>* successor_blocks;
+  ArenaVector<BasicBlockId> predecessors;
+  ArenaVector<SuccessorBlockInfo*> successor_blocks;
 
   void AppendMIR(MIR* mir);
   void AppendMIRList(MIR* first_list_mir, MIR* last_list_mir);
@@ -446,7 +422,7 @@ struct BasicBlock {
    * @brief Hide the BasicBlock.
    * @details Set it to kDalvikByteCode, set hidden to true, remove all MIRs,
    *          remove itself from any predecessor edges, remove itself from any
-   *          child's predecessor growable array.
+   *          child's predecessor array.
    */
   void Hide(CompilationUnit* c_unit);
 
@@ -461,7 +437,12 @@ struct BasicBlock {
   bool ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb);
 
   /**
-   * @brief Update the predecessor growable array from old_pred to new_pred.
+   * @brief Erase the predecessor old_pred.
+   */
+  void ErasePredecessor(BasicBlockId old_pred);
+
+  /**
+   * @brief Update the predecessor array from old_pred to new_pred.
    */
   void UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred);
 
@@ -512,7 +493,7 @@ class ChildBlockIterator {
   bool visited_fallthrough_;
   bool visited_taken_;
   bool have_successors_;
-  GrowableArray<SuccessorBlockInfo*>::Iterator successor_iter_;
+  ArenaVector<SuccessorBlockInfo*>::const_iterator successor_iter_;
 };
 
 /*
@@ -544,7 +525,7 @@ const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, RegStorage
 class MIRGraph {
  public:
   MIRGraph(CompilationUnit* cu, ArenaAllocator* arena);
-  ~MIRGraph();
+  virtual ~MIRGraph();
 
   /*
    * Examine the graph to determine whether it's worthwile to spend the time compiling
@@ -598,9 +579,12 @@ class MIRGraph {
     return num_blocks_;
   }
 
-  size_t GetNumDalvikInsns() const {
-    return cu_->code_item->insns_size_in_code_units_;
-  }
+  /**
+   * @brief Provides the total size in code units of all instructions in MIRGraph.
+   * @details Includes the sizes of all methods in compilation unit.
+   * @return Returns the cumulative sum of all insn sizes (in code units).
+   */
+  size_t GetNumDalvikInsns() const;
 
   ArenaBitVector* GetTryBlockAddr() const {
     return try_block_addr_;
@@ -615,26 +599,27 @@ class MIRGraph {
   }
 
   BasicBlock* GetBasicBlock(unsigned int block_id) const {
-    return (block_id == NullBasicBlockId) ? NULL : block_list_.Get(block_id);
+    DCHECK_LT(block_id, block_list_.size());  // NOTE: NullBasicBlockId is 0.
+    return (block_id == NullBasicBlockId) ? NULL : block_list_[block_id];
   }
 
   size_t GetBasicBlockListCount() const {
-    return block_list_.Size();
+    return block_list_.size();
   }
 
-  GrowableArray<BasicBlock*>* GetBlockList() {
-    return &block_list_;
+  const ArenaVector<BasicBlock*>& GetBlockList() {
+    return block_list_;
   }
 
-  GrowableArray<BasicBlockId>* GetDfsOrder() {
+  const ArenaVector<BasicBlockId>& GetDfsOrder() {
     return dfs_order_;
   }
 
-  GrowableArray<BasicBlockId>* GetDfsPostOrder() {
+  const ArenaVector<BasicBlockId>& GetDfsPostOrder() {
     return dfs_post_order_;
   }
 
-  GrowableArray<BasicBlockId>* GetDomPostOrder() {
+  const ArenaVector<BasicBlockId>& GetDomPostOrder() {
     return dom_post_order_traversal_;
   }
 
@@ -681,20 +666,20 @@ class MIRGraph {
   void DoCacheFieldLoweringInfo();
 
   const MirIFieldLoweringInfo& GetIFieldLoweringInfo(MIR* mir) const {
-    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.Size());
-    return ifield_lowering_infos_.GetRawStorage()[mir->meta.ifield_lowering_info];
+    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
+    return ifield_lowering_infos_[mir->meta.ifield_lowering_info];
   }
 
   const MirSFieldLoweringInfo& GetSFieldLoweringInfo(MIR* mir) const {
-    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.Size());
-    return sfield_lowering_infos_.GetRawStorage()[mir->meta.sfield_lowering_info];
+    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
+    return sfield_lowering_infos_[mir->meta.sfield_lowering_info];
   }
 
   void DoCacheMethodLoweringInfo();
 
   const MirMethodLoweringInfo& GetMethodLoweringInfo(MIR* mir) {
-    DCHECK_LT(mir->meta.method_lowering_info, method_lowering_infos_.Size());
-    return method_lowering_infos_.GetRawStorage()[mir->meta.method_lowering_info];
+    DCHECK_LT(mir->meta.method_lowering_info, method_lowering_infos_.size());
+    return method_lowering_infos_[mir->meta.method_lowering_info];
   }
 
   void ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke, MIR* iget_or_iput);
@@ -707,24 +692,24 @@ class MIRGraph {
 
   void BasicBlockOptimization();
 
-  GrowableArray<BasicBlockId>* GetTopologicalSortOrder() {
-    DCHECK(topological_order_ != nullptr);
+  const ArenaVector<BasicBlockId>& GetTopologicalSortOrder() {
+    DCHECK(!topological_order_.empty());
     return topological_order_;
   }
 
-  GrowableArray<BasicBlockId>* GetTopologicalSortOrderLoopEnds() {
-    DCHECK(topological_order_loop_ends_ != nullptr);
+  const ArenaVector<BasicBlockId>& GetTopologicalSortOrderLoopEnds() {
+    DCHECK(!topological_order_loop_ends_.empty());
     return topological_order_loop_ends_;
   }
 
-  GrowableArray<BasicBlockId>* GetTopologicalSortOrderIndexes() {
-    DCHECK(topological_order_indexes_ != nullptr);
+  const ArenaVector<BasicBlockId>& GetTopologicalSortOrderIndexes() {
+    DCHECK(!topological_order_indexes_.empty());
     return topological_order_indexes_;
   }
 
-  GrowableArray<std::pair<uint16_t, bool>>* GetTopologicalSortOrderLoopHeadStack() {
-    DCHECK(topological_order_loop_head_stack_ != nullptr);
-    return topological_order_loop_head_stack_;
+  ArenaVector<std::pair<uint16_t, bool>>* GetTopologicalSortOrderLoopHeadStack() {
+    DCHECK(!topological_order_.empty());  // Checking the main array, not the stack.
+    return &topological_order_loop_head_stack_;
   }
 
   bool IsConst(int32_t s_reg) const {
@@ -802,16 +787,19 @@ class MIRGraph {
     return num_reachable_blocks_;
   }
 
-  int GetUseCount(int sreg) const {
-    return use_counts_.Get(sreg);
+  uint32_t GetUseCount(int sreg) const {
+    DCHECK_LT(static_cast<size_t>(sreg), use_counts_.size());
+    return use_counts_[sreg];
   }
 
-  int GetRawUseCount(int sreg) const {
-    return raw_use_counts_.Get(sreg);
+  uint32_t GetRawUseCount(int sreg) const {
+    DCHECK_LT(static_cast<size_t>(sreg), raw_use_counts_.size());
+    return raw_use_counts_[sreg];
   }
 
   int GetSSASubscript(int ssa_reg) const {
-    return ssa_subscripts_->Get(ssa_reg);
+    DCHECK_LT(static_cast<size_t>(ssa_reg), ssa_subscripts_.size());
+    return ssa_subscripts_[ssa_reg];
   }
 
   RegLocation GetRawSrc(MIR* mir, int num) {
@@ -1159,6 +1147,7 @@ class MIRGraph {
   void ComputeDefBlockMatrix();
   void ComputeDominators();
   void CompilerInitializeSSAConversion();
+  virtual void InitializeBasicBlockDataFlow();
   void InsertPhiNodes();
   void DoDFSPreOrderSSARename(BasicBlock* block);
 
@@ -1173,16 +1162,15 @@ class MIRGraph {
   ArenaSafeMap<unsigned int, unsigned int> block_id_map_;   // Block collapse lookup cache.
 
   static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst];
-  static const uint32_t analysis_attributes_[kMirOpLast];
 
   void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
 
   // Used for removing redudant suspend tests
   void AppendGenSuspendTestList(BasicBlock* bb) {
-    if (gen_suspend_test_list_.Size() == 0 ||
-        gen_suspend_test_list_.Get(gen_suspend_test_list_.Size() - 1) != bb) {
-      gen_suspend_test_list_.Insert(bb);
+    if (gen_suspend_test_list_.size() == 0 ||
+        gen_suspend_test_list_.back() != bb) {
+      gen_suspend_test_list_.push_back(bb);
     }
   }
 
@@ -1203,7 +1191,6 @@ class MIRGraph {
                       ArenaBitVector* live_in_v,
                       const MIR::DecodedInstruction& d_insn);
   bool DoSSAConversion(BasicBlock* bb);
-  bool InvokeUsesMethodStar(MIR* mir);
   int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
   bool ContentIsInsn(const uint16_t* code_ptr);
   BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block,
@@ -1248,30 +1235,30 @@ class MIRGraph {
                               std::string* skip_message);
 
   CompilationUnit* const cu_;
-  GrowableArray<int>* ssa_base_vregs_;
-  GrowableArray<int>* ssa_subscripts_;
+  ArenaVector<int> ssa_base_vregs_;
+  ArenaVector<int> ssa_subscripts_;
   // Map original Dalvik virtual reg i to the current SSA name.
   int* vreg_to_ssa_map_;            // length == method->registers_size
   int* ssa_last_defs_;              // length == method->registers_size
   ArenaBitVector* is_constant_v_;   // length == num_ssa_reg
   int* constant_values_;            // length == num_ssa_reg
   // Use counts of ssa names.
-  GrowableArray<uint32_t> use_counts_;      // Weighted by nesting depth
-  GrowableArray<uint32_t> raw_use_counts_;  // Not weighted
+  ArenaVector<uint32_t> use_counts_;      // Weighted by nesting depth
+  ArenaVector<uint32_t> raw_use_counts_;  // Not weighted
   unsigned int num_reachable_blocks_;
   unsigned int max_num_reachable_blocks_;
-  GrowableArray<BasicBlockId>* dfs_order_;
-  GrowableArray<BasicBlockId>* dfs_post_order_;
-  GrowableArray<BasicBlockId>* dom_post_order_traversal_;
-  GrowableArray<BasicBlockId>* topological_order_;
+  ArenaVector<BasicBlockId> dfs_order_;
+  ArenaVector<BasicBlockId> dfs_post_order_;
+  ArenaVector<BasicBlockId> dom_post_order_traversal_;
+  ArenaVector<BasicBlockId> topological_order_;
   // Indexes in topological_order_ need to be only as big as the BasicBlockId.
   COMPILE_ASSERT(sizeof(BasicBlockId) == sizeof(uint16_t), assuming_16_bit_BasicBlockId);
   // For each loop head, remember the past-the-end index of the end of the loop. 0 if not loop head.
-  GrowableArray<uint16_t>* topological_order_loop_ends_;
+  ArenaVector<uint16_t> topological_order_loop_ends_;
   // Map BB ids to topological_order_ indexes. 0xffff if not included (hidden or null block).
-  GrowableArray<uint16_t>* topological_order_indexes_;
+  ArenaVector<uint16_t> topological_order_indexes_;
   // Stack of the loop head indexes and recalculation flags for RepeatingTopologicalSortIterator.
-  GrowableArray<std::pair<uint16_t, bool>>* topological_order_loop_head_stack_;
+  ArenaVector<std::pair<uint16_t, bool>> topological_order_loop_head_stack_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // original num registers x num_blocks.
   std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
@@ -1280,13 +1267,13 @@ class MIRGraph {
   ArenaBitVector* temp_bit_vector_;
   std::unique_ptr<GlobalValueNumbering> temp_gvn_;
   static const int kInvalidEntry = -1;
-  GrowableArray<BasicBlock*> block_list_;
+  ArenaVector<BasicBlock*> block_list_;
   ArenaBitVector* try_block_addr_;
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
   unsigned int num_blocks_;
   const DexFile::CodeItem* current_code_item_;
-  GrowableArray<uint16_t> dex_pc_to_block_map_;  // FindBlock lookup cache.
+  ArenaVector<uint16_t> dex_pc_to_block_map_;    // FindBlock lookup cache.
   ArenaVector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
   typedef std::pair<int, int> MIRLocation;       // Insert point, (m_unit_ index, offset)
   ArenaVector<MIRLocation> method_stack_;        // Include stack
@@ -1310,11 +1297,11 @@ class MIRGraph {
   bool compiler_temps_committed_;          // Keeps track whether number of temps has been frozen (for example post frame size calculation).
   bool punt_to_interpreter_;               // Difficult or not worthwhile - just interpret.
   uint64_t merged_df_flags_;
-  GrowableArray<MirIFieldLoweringInfo> ifield_lowering_infos_;
-  GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_;
-  GrowableArray<MirMethodLoweringInfo> method_lowering_infos_;
+  ArenaVector<MirIFieldLoweringInfo> ifield_lowering_infos_;
+  ArenaVector<MirSFieldLoweringInfo> sfield_lowering_infos_;
+  ArenaVector<MirMethodLoweringInfo> method_lowering_infos_;
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
-  GrowableArray<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
+  ArenaVector<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
 
   friend class ClassInitCheckEliminationTest;
   friend class GlobalValueNumberingTest;
diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc
index bdc05a968e..a96cd84297 100644
--- a/compiler/dex/mir_graph_test.cc
+++ b/compiler/dex/mir_graph_test.cc
@@ -57,56 +57,48 @@ class TopologicalSortOrderTest : public testing::Test {
 
   void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
     cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.Reset();
+    cu_.mir_graph->block_list_.clear();
     ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
     ASSERT_EQ(kNullBlock, defs[0].type);
     ASSERT_EQ(kEntryBlock, defs[1].type);
     ASSERT_EQ(kExitBlock, defs[2].type);
     for (size_t i = 0u; i != count; ++i) {
       const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i);
-      cu_.mir_graph->block_list_.Insert(bb);
+      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
       if (def->num_successors <= 2) {
         bb->successor_block_list_type = kNotUsed;
-        bb->successor_blocks = nullptr;
         bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
         bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
       } else {
         bb->successor_block_list_type = kPackedSwitch;
         bb->fall_through = 0u;
         bb->taken = 0u;
-        bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
-            &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks);
+        bb->successor_blocks.reserve(def->num_successors);
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
                                                                kArenaAllocSuccessor));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks->Insert(successor_block_info);
+          bb->successor_blocks.push_back(successor_block_info);
         }
       }
-      bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>(
-          &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors);
-      for (size_t j = 0u; j != def->num_predecessors; ++j) {
-        ASSERT_NE(0u, def->predecessors[j]);
-        bb->predecessors->Insert(def->predecessors[j]);
-      }
+      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
       if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
         bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
             cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
       }
     }
     cu_.mir_graph->num_blocks_ = count;
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.Size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1);
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
     ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
     ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
 
     DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(cu_.arena.Alloc(sizeof(DexFile::CodeItem),
                                                                                    kArenaAllocMisc));
-    cu_.mir_graph->current_code_item_ = cu_.code_item = code_item;
+    cu_.mir_graph->current_code_item_ = code_item;
   }
 
   template <size_t count>
@@ -120,21 +112,21 @@ class TopologicalSortOrderTest : public testing::Test {
     cu_.mir_graph->ComputeDominators();
     cu_.mir_graph->ComputeTopologicalSortOrder();
     cu_.mir_graph->SSATransformationEnd();
-    ASSERT_NE(cu_.mir_graph->topological_order_, nullptr);
-    ASSERT_NE(cu_.mir_graph->topological_order_loop_ends_, nullptr);
-    ASSERT_NE(cu_.mir_graph->topological_order_indexes_, nullptr);
-    ASSERT_EQ(cu_.mir_graph->GetNumBlocks(), cu_.mir_graph->topological_order_indexes_->Size());
-    for (size_t i = 0, size = cu_.mir_graph->GetTopologicalSortOrder()->Size(); i != size; ++i) {
-      ASSERT_LT(cu_.mir_graph->topological_order_->Get(i), cu_.mir_graph->GetNumBlocks());
-      BasicBlockId id = cu_.mir_graph->topological_order_->Get(i);
-      EXPECT_EQ(i, cu_.mir_graph->topological_order_indexes_->Get(id));
+    ASSERT_FALSE(cu_.mir_graph->topological_order_.empty());
+    ASSERT_FALSE(cu_.mir_graph->topological_order_loop_ends_.empty());
+    ASSERT_FALSE(cu_.mir_graph->topological_order_indexes_.empty());
+    ASSERT_EQ(cu_.mir_graph->GetNumBlocks(), cu_.mir_graph->topological_order_indexes_.size());
+    for (size_t i = 0, size = cu_.mir_graph->GetTopologicalSortOrder().size(); i != size; ++i) {
+      ASSERT_LT(cu_.mir_graph->topological_order_[i], cu_.mir_graph->GetNumBlocks());
+      BasicBlockId id = cu_.mir_graph->topological_order_[i];
+      EXPECT_EQ(i, cu_.mir_graph->topological_order_indexes_[id]);
     }
   }
 
   void DoCheckOrder(const BasicBlockId* ids, size_t count) {
-    ASSERT_EQ(count, cu_.mir_graph->GetTopologicalSortOrder()->Size());
+    ASSERT_EQ(count, cu_.mir_graph->GetTopologicalSortOrder().size());
     for (size_t i = 0; i != count; ++i) {
-      EXPECT_EQ(ids[i], cu_.mir_graph->GetTopologicalSortOrder()->Get(i)) << i;
+      EXPECT_EQ(ids[i], cu_.mir_graph->GetTopologicalSortOrder()[i]) << i;
     }
   }
 
@@ -145,8 +137,8 @@ class TopologicalSortOrderTest : public testing::Test {
 
   void DoCheckLoopEnds(const uint16_t* ends, size_t count) {
     for (size_t i = 0; i != count; ++i) {
-      ASSERT_LT(i, cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Size());
-      EXPECT_EQ(ends[i], cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Get(i)) << i;
+      ASSERT_LT(i, cu_.mir_graph->GetTopologicalSortOrderLoopEnds().size());
+      EXPECT_EQ(ends[i], cu_.mir_graph->GetTopologicalSortOrderLoopEnds()[i]) << i;
     }
   }
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index fdabc3e3cb..dac71f6aa7 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 static unsigned int Predecessors(BasicBlock* bb) {
-  return bb->predecessors->Size();
+  return bb->predecessors.size();
 }
 
 /* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */
@@ -230,7 +230,8 @@ COMPILE_ASSERT(ConditionCodeForIfCcZ(Instruction::IF_GTZ) == kCondGt, check_if_g
 COMPILE_ASSERT(ConditionCodeForIfCcZ(Instruction::IF_LEZ) == kCondLe, check_if_lez_ccode);
 
 int MIRGraph::GetSSAUseCount(int s_reg) {
-  return raw_use_counts_.Get(s_reg);
+  DCHECK_LT(static_cast<size_t>(s_reg), ssa_subscripts_.size());
+  return raw_use_counts_[s_reg];
 }
 
 size_t MIRGraph::GetNumBytesForSpecialTemps() const {
@@ -697,7 +698,8 @@ bool MIRGraph::LayoutBlocks(BasicBlock* bb) {
     if ((walker->block_type == kEntryBlock) || (Predecessors(walker) != 1)) {
       break;
     }
-    BasicBlock* prev = GetBasicBlock(walker->predecessors->Get(0));
+    DCHECK(!walker->predecessors.empty());
+    BasicBlock* prev = GetBasicBlock(walker->predecessors[0]);
 
     // If we visited the predecessor, we are done.
     if (prev->visited) {
@@ -784,7 +786,7 @@ void MIRGraph::CombineBlocks(struct BasicBlock* bb) {
     *bb->last_mir_insn = *throw_insn;
     // Use the successor info from the next block
     bb->successor_block_list_type = bb_next->successor_block_list_type;
-    bb->successor_blocks = bb_next->successor_blocks;
+    bb->successor_blocks.swap(bb_next->successor_blocks);  // Swap instead of copying.
     // Use the ending block linkage from the next block
     bb->fall_through = bb_next->fall_through;
     GetBasicBlock(bb->taken)->block_type = kDead;  // Kill the unused exception block
@@ -856,8 +858,8 @@ bool MIRGraph::EliminateNullChecksAndInferTypes(BasicBlock* bb) {
         int this_reg = GetFirstInVR();
         ssa_regs_to_check->ClearBit(this_reg);
       }
-    } else if (bb->predecessors->Size() == 1) {
-      BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
+    } else if (bb->predecessors.size() == 1) {
+      BasicBlock* pred_bb = GetBasicBlock(bb->predecessors[0]);
       // pred_bb must have already been processed at least once.
       DCHECK(pred_bb->data_flow_info->ending_check_v != nullptr);
       ssa_regs_to_check->Copy(pred_bb->data_flow_info->ending_check_v);
@@ -883,25 +885,22 @@ bool MIRGraph::EliminateNullChecksAndInferTypes(BasicBlock* bb) {
       }
     } else {
       // Starting state is union of all incoming arcs
-      GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-      BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-      CHECK(pred_bb != NULL);
-      while (pred_bb->data_flow_info->ending_check_v == nullptr) {
-        pred_bb = GetBasicBlock(iter.Next());
-        // At least one predecessor must have been processed before this bb.
+      bool copied_first = false;
+      for (BasicBlockId pred_id : bb->predecessors) {
+        BasicBlock* pred_bb = GetBasicBlock(pred_id);
         DCHECK(pred_bb != nullptr);
         DCHECK(pred_bb->data_flow_info != nullptr);
-      }
-      ssa_regs_to_check->Copy(pred_bb->data_flow_info->ending_check_v);
-      while (true) {
-        pred_bb = GetBasicBlock(iter.Next());
-        if (!pred_bb) break;
-        DCHECK(pred_bb->data_flow_info != nullptr);
         if (pred_bb->data_flow_info->ending_check_v == nullptr) {
           continue;
         }
-        ssa_regs_to_check->Union(pred_bb->data_flow_info->ending_check_v);
+        if (!copied_first) {
+          copied_first = true;
+          ssa_regs_to_check->Copy(pred_bb->data_flow_info->ending_check_v);
+        } else {
+          ssa_regs_to_check->Union(pred_bb->data_flow_info->ending_check_v);
+        }
       }
+      DCHECK(copied_first);  // At least one predecessor must have been processed before this bb.
     }
     // At this point, ssa_regs_to_check shows which sregs have an object definition with
     // no intervening uses.
@@ -1067,7 +1066,7 @@ bool MIRGraph::EliminateClassInitChecksGate() {
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
 
   // Each insn we use here has at least 2 code units, offset/2 will be a unique index.
-  const size_t end = (cu_->code_item->insns_size_in_code_units_ + 1u) / 2u;
+  const size_t end = (GetNumDalvikInsns() + 1u) / 2u;
   temp_insn_data_ = static_cast<uint16_t*>(
       temp_scoped_alloc_->Alloc(end * sizeof(*temp_insn_data_), kArenaAllocGrowableArray));
 
@@ -1156,35 +1155,31 @@ bool MIRGraph::EliminateClassInitChecks(BasicBlock* bb) {
   DCHECK(classes_to_check != nullptr);
   if (bb->block_type == kEntryBlock) {
     classes_to_check->SetInitialBits(temp_bit_vector_size_);
-  } else if (bb->predecessors->Size() == 1) {
-    BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
+  } else if (bb->predecessors.size() == 1) {
+    BasicBlock* pred_bb = GetBasicBlock(bb->predecessors[0]);
     // pred_bb must have already been processed at least once.
     DCHECK(pred_bb != nullptr);
     DCHECK(pred_bb->data_flow_info != nullptr);
     DCHECK(pred_bb->data_flow_info->ending_check_v != nullptr);
     classes_to_check->Copy(pred_bb->data_flow_info->ending_check_v);
   } else {
-    // Starting state is union of all incoming arcs
-    GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-    DCHECK(pred_bb != NULL);
-    DCHECK(pred_bb->data_flow_info != NULL);
-    while (pred_bb->data_flow_info->ending_check_v == nullptr) {
-      pred_bb = GetBasicBlock(iter.Next());
-      // At least one predecessor must have been processed before this bb.
+    // Starting state is union of all incoming arcs.
+    bool copied_first = false;
+    for (BasicBlockId pred_id : bb->predecessors) {
+      BasicBlock* pred_bb = GetBasicBlock(pred_id);
       DCHECK(pred_bb != nullptr);
       DCHECK(pred_bb->data_flow_info != nullptr);
-    }
-    classes_to_check->Copy(pred_bb->data_flow_info->ending_check_v);
-    while (true) {
-      pred_bb = GetBasicBlock(iter.Next());
-      if (!pred_bb) break;
-      DCHECK(pred_bb->data_flow_info != nullptr);
       if (pred_bb->data_flow_info->ending_check_v == nullptr) {
         continue;
       }
-      classes_to_check->Union(pred_bb->data_flow_info->ending_check_v);
+      if (!copied_first) {
+        copied_first = true;
+        classes_to_check->Copy(pred_bb->data_flow_info->ending_check_v);
+      } else {
+        classes_to_check->Union(pred_bb->data_flow_info->ending_check_v);
+      }
     }
+    DCHECK(copied_first);  // At least one predecessor must have been processed before this bb.
   }
   // At this point, classes_to_check shows which classes need clinit checks.
 
@@ -1312,8 +1307,8 @@ void MIRGraph::ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke,
   MirIFieldLoweringInfo::Resolve(cu_->compiler_driver, &inlined_unit, &inlined_field_info, 1u);
   DCHECK(inlined_field_info.IsResolved());
 
-  uint32_t field_info_index = ifield_lowering_infos_.Size();
-  ifield_lowering_infos_.Insert(inlined_field_info);
+  uint32_t field_info_index = ifield_lowering_infos_.size();
+  ifield_lowering_infos_.push_back(inlined_field_info);
   temp_bit_vector_->SetBit(method_index);
   temp_insn_data_[method_index] = field_info_index;
   iget_or_iput->meta.ifield_lowering_info = field_info_index;
@@ -1321,7 +1316,7 @@ void MIRGraph::ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke,
 
 bool MIRGraph::InlineSpecialMethodsGate() {
   if ((cu_->disable_opt & (1 << kSuppressMethodInlining)) != 0 ||
-      method_lowering_infos_.Size() == 0u) {
+      method_lowering_infos_.size() == 0u) {
     return false;
   }
   if (cu_->compiler_driver->GetMethodInlinerMap() == nullptr) {
@@ -1337,7 +1332,7 @@ void MIRGraph::InlineSpecialMethodsStart() {
 
   DCHECK(temp_scoped_alloc_.get() == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_bit_vector_size_ = method_lowering_infos_.Size();
+  temp_bit_vector_size_ = method_lowering_infos_.size();
   temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapMisc);
   temp_bit_vector_->ClearAllBits();
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index c510b528ff..55e547e56f 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -76,8 +76,8 @@ class ClassInitCheckEliminationTest : public testing::Test {
     { opcode, bb, field_info }
 
   void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.Reset();
-    cu_.mir_graph->sfield_lowering_infos_.Resize(count);
+    cu_.mir_graph->sfield_lowering_infos_.clear();
+    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
       MirSFieldLoweringInfo field_info(def->field_idx);
@@ -89,7 +89,7 @@ class ClassInitCheckEliminationTest : public testing::Test {
       }
       ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
       ASSERT_FALSE(field_info.IsInitialized());
-      cu_.mir_graph->sfield_lowering_infos_.Insert(field_info);
+      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
   }
 
@@ -100,51 +100,43 @@ class ClassInitCheckEliminationTest : public testing::Test {
 
   void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
     cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.Reset();
+    cu_.mir_graph->block_list_.clear();
     ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
     ASSERT_EQ(kNullBlock, defs[0].type);
     ASSERT_EQ(kEntryBlock, defs[1].type);
     ASSERT_EQ(kExitBlock, defs[2].type);
     for (size_t i = 0u; i != count; ++i) {
       const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i);
-      cu_.mir_graph->block_list_.Insert(bb);
+      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
       if (def->num_successors <= 2) {
         bb->successor_block_list_type = kNotUsed;
-        bb->successor_blocks = nullptr;
         bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
         bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
       } else {
         bb->successor_block_list_type = kPackedSwitch;
         bb->fall_through = 0u;
         bb->taken = 0u;
-        bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
-            &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks);
+        bb->successor_blocks.reserve(def->num_successors);
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
                                                                kArenaAllocSuccessor));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks->Insert(successor_block_info);
+          bb->successor_blocks.push_back(successor_block_info);
         }
       }
-      bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>(
-          &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors);
-      for (size_t j = 0u; j != def->num_predecessors; ++j) {
-        ASSERT_NE(0u, def->predecessors[j]);
-        bb->predecessors->Insert(def->predecessors[j]);
-      }
+      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
       if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
         bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
             cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
       }
     }
     cu_.mir_graph->num_blocks_ = count;
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.Size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1);
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
     ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
     ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
   }
 
@@ -161,11 +153,11 @@ class ClassInitCheckEliminationTest : public testing::Test {
       const MIRDef* def = &defs[i];
       MIR* mir = &mirs_[i];
       mir->dalvikInsn.opcode = def->opcode;
-      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.Size());
-      BasicBlock* bb = cu_.mir_graph->block_list_.Get(def->bbid);
+      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
+      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
       bb->AppendMIR(mir);
       if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
-        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.Size());
+        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_or_method_info;
       }
       mir->ssa_rep = nullptr;
@@ -179,7 +171,7 @@ class ClassInitCheckEliminationTest : public testing::Test {
         cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
     memset(code_item_, 0, sizeof(DexFile::CodeItem));
     code_item_->insns_size_in_code_units_ = 2u * count;
-    cu_.mir_graph->current_code_item_ = cu_.code_item = code_item_;
+    cu_.mir_graph->current_code_item_ = code_item_;
   }
 
   template <size_t count>
@@ -408,12 +400,10 @@ TEST_F(ClassInitCheckEliminationTest, Catch) {
   // Add successor block info to the check block.
   BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
   check_bb->successor_block_list_type = kCatch;
-  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
-      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
   SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
       (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
   successor_block_info->block = catch_handler->id;
-  check_bb->successor_blocks->Insert(successor_block_info);
+  check_bb->successor_blocks.push_back(successor_block_info);
   PrepareMIRs(mirs);
   PerformClassInitCheckElimination();
   ASSERT_EQ(arraysize(expected_ignore_clinit_check), mir_count_);
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index b0b06065b1..ba255e0a76 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -64,7 +64,7 @@ LLVMInfo::~LLVMInfo() {
 }
 
 ::llvm::Value* MirConverter::GetLLVMValue(int s_reg) {
-  return llvm_values_.Get(s_reg);
+  return llvm_values_[s_reg];
 }
 
 void MirConverter::SetVregOnValue(::llvm::Value* val, int s_reg) {
@@ -87,7 +87,7 @@ void MirConverter::DefineValueOnly(::llvm::Value* val, int s_reg) {
   }
   placeholder->replaceAllUsesWith(val);
   val->takeName(placeholder);
-  llvm_values_.Put(s_reg, val);
+  llvm_values_[s_reg] = val;
   ::llvm::Instruction* inst = ::llvm::dyn_cast< ::llvm::Instruction>(placeholder);
   DCHECK(inst != NULL);
   inst->eraseFromParent();
@@ -1740,15 +1740,12 @@ bool MirConverter::BlockBitcodeConversion(BasicBlock* bb) {
             art::llvm::IntrinsicHelper::CatchTargets);
         ::llvm::Value* switch_key =
             irb_->CreateCall(intr, irb_->getInt32(mir->offset));
-        GrowableArray<SuccessorBlockInfo*>::Iterator iter(bb->successor_blocks);
         // New basic block to use for work half
         ::llvm::BasicBlock* work_bb =
             ::llvm::BasicBlock::Create(*context_, "", func_);
         ::llvm::SwitchInst* sw =
-            irb_->CreateSwitch(switch_key, work_bb, bb->successor_blocks->Size());
-        while (true) {
-          SuccessorBlockInfo *successor_block_info = iter.Next();
-          if (successor_block_info == NULL) break;
+            irb_->CreateSwitch(switch_key, work_bb, bb->successor_blocks.size());
+        for (SuccessorBlockInfo *successor_block_info : bb->successor_blocks) {
           ::llvm::BasicBlock *target =
               GetLLVMBlock(successor_block_info->block);
           int type_index = successor_block_info->key;
@@ -1908,18 +1905,18 @@ void MirConverter::MethodMIR2Bitcode() {
     ::llvm::Value* val;
     RegLocation rl_temp = mir_graph_->reg_location_[i];
     if ((mir_graph_->SRegToVReg(i) < 0) || rl_temp.high_word) {
-      llvm_values_.Insert(0);
+      llvm_values_.push_back(0);
     } else if ((i < mir_graph_->GetFirstInVR()) ||
                (i >= (mir_graph_->GetFirstTempVR()))) {
       ::llvm::Constant* imm_value = mir_graph_->reg_location_[i].wide ?
          irb_->getJLong(0) : irb_->getJInt(0);
       val = EmitConst(imm_value, mir_graph_->reg_location_[i]);
       val->setName(mir_graph_->GetSSAName(i));
-      llvm_values_.Insert(val);
+      llvm_values_.push_back(val);
     } else {
       // Recover previously-created argument values
       ::llvm::Value* arg_val = arg_iter++;
-      llvm_values_.Insert(arg_val);
+      llvm_values_.push_back(arg_val);
     }
   }
 
@@ -1966,7 +1963,7 @@ void MirConverter::MethodMIR2Bitcode() {
      if (::llvm::verifyFunction(*func_, ::llvm::PrintMessageAction)) {
        LOG(INFO) << "Bitcode verification FAILED for "
                  << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-                 << " of size " << cu_->code_item->insns_size_in_code_units_;
+                 << " of size " << mir_graph_->GetNumDalvikInsns();
        cu_->enable_debug |= (1 << kDebugDumpBitcodeFile);
      }
   }
diff --git a/compiler/dex/portable/mir_to_gbc.h b/compiler/dex/portable/mir_to_gbc.h
index e6dee5dc0a..94ae3f7e5f 100644
--- a/compiler/dex/portable/mir_to_gbc.h
+++ b/compiler/dex/portable/mir_to_gbc.h
@@ -31,6 +31,7 @@
 #include "llvm/intrinsic_helper.h"
 #include "llvm/llvm_compilation_unit.h"
 #include "safe_map.h"
+#include "utils/arena_containers.h"
 
 namespace llvm {
   class Module;
@@ -104,9 +105,10 @@ class MirConverter : public Backend {
         placeholder_bb_(NULL),
         entry_bb_(NULL),
         entry_target_bb_(NULL),
-        llvm_values_(arena, mir_graph->GetNumSSARegs()),
+        llvm_values_(arena->Adapter()),
         temp_name_(0),
         current_dalvik_offset_(0) {
+      llvm_values_.reserve(mir_graph->GetNumSSARegs());
       if (kIsDebugBuild) {
         cu->enable_debug |= (1 << kDebugVerifyBitcode);
       }
@@ -228,7 +230,7 @@ class MirConverter : public Backend {
     ::llvm::BasicBlock* entry_bb_;
     ::llvm::BasicBlock* entry_target_bb_;
     std::string bitcode_filename_;
-    GrowableArray< ::llvm::Value*> llvm_values_;
+    ArenaVector< ::llvm::Value*> llvm_values_;
     int32_t temp_name_;
     SafeMap<int32_t, ::llvm::BasicBlock*> id_to_block_map_;  // block id -> llvm bb.
     int current_dalvik_offset_;
diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc
index b3d5c8a032..675dbcf91d 100644
--- a/compiler/dex/post_opt_passes.cc
+++ b/compiler/dex/post_opt_passes.cc
@@ -84,7 +84,7 @@ void CalculatePredecessors::Start(PassDataHolder* data) const {
   // First clear all predecessors.
   AllNodesIterator first(mir_graph);
   for (BasicBlock* bb = first.Next(); bb != nullptr; bb = first.Next()) {
-    bb->predecessors->Reset();
+    bb->predecessors.clear();
   }
 
   // Now calculate all predecessors.
@@ -100,7 +100,7 @@ void CalculatePredecessors::Start(PassDataHolder* data) const {
 
     // Now iterate through the children to set the predecessor bits.
     for (BasicBlock* child = child_iter.Next(); child != nullptr; child = child_iter.Next()) {
-      child->predecessors->Insert(bb->id);
+      child->predecessors.push_back(bb->id);
     }
   }
 }
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index b95789ea54..d935bc30c4 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -528,6 +528,7 @@ enum ArmOpcode {
   kThumb2Vldms,      // vldms rd, <list>.
   kThumb2Vstms,      // vstms rd, <list>.
   kThumb2BUncond,    // b <label>.
+  kThumb2Bl,         // bl with linker fixup. [11110] S imm10 [11] J1 [1] J2 imm11.
   kThumb2MovImm16H,  // similar to kThumb2MovImm16, but target high hw.
   kThumb2AddPCR,     // Thumb2 2-operand add with hard-coded PC target.
   kThumb2Adr,        // Special purpose encoding of ADR for switch tables.
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index dcec861e22..cf34948969 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -968,6 +968,10 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
                  kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
                  "b", "!0t", 4, kFixupT2Branch),
+    ENCODING_MAP(kThumb2Bl,           0xf000d000,
+                 kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl", "!0T", 4, kFixupLabel),
     ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index fc98d31dd4..b721e02e9a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -20,6 +20,8 @@
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "gc/accounting/card_table.h"
+#include "mirror/art_method.h"
+#include "mirror/object_array-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
@@ -55,7 +57,7 @@ void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocati
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
   tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
@@ -104,7 +106,7 @@ void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocati
   uint32_t size = table[1];
   tab_rec->targets =
       static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
@@ -158,7 +160,7 @@ void ArmMir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation
   uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
   tab_rec->size = (size * width) + 8;
 
-  fill_array_data_.Insert(tab_rec);
+  fill_array_data_.push_back(tab_rec);
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
@@ -499,4 +501,117 @@ void ArmMir2Lir::GenSpecialExitSequence() {
   NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
 }
 
+static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
+  // Emit relative calls only within a dex file due to the limited range of the BL insn.
+  return cu->dex_file == target_method.dex_file;
+}
+
+/*
+ * Bit of a hack here - in the absence of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                             int state, const MethodReference& target_method,
+                             uint32_t unused,
+                             uintptr_t direct_code, uintptr_t direct_method,
+                             InvokeType type) {
+  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+  if (direct_code != 0 && direct_method != 0) {
+    switch (state) {
+    case 0:  // Get the current Method* [sets kArg0]
+      if (direct_code != static_cast<uintptr_t>(-1)) {
+        cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+      } else if (ArmUseRelativeCall(cu, target_method)) {
+        // Defer to linker patch.
+      } else {
+        cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+      }
+      if (direct_method != static_cast<uintptr_t>(-1)) {
+        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+      } else {
+        cg->LoadMethodAddress(target_method, type, kArg0);
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0:  // Get the current Method* [sets kArg0]
+      // TUNING: we can save a reg copy if Method* has been promoted.
+      cg->LoadCurrMethodDirect(arg0_ref);
+      break;
+    case 1:  // Get method->dex_cache_resolved_methods_
+      cg->LoadRefDisp(arg0_ref,
+                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                      arg0_ref,
+                      kNotVolatile);
+      // Set up direct code if known.
+      if (direct_code != 0) {
+        if (direct_code != static_cast<uintptr_t>(-1)) {
+          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+        } else if (ArmUseRelativeCall(cu, target_method)) {
+          // Defer to linker patch.
+        } else {
+          CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
+          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+        }
+      }
+      break;
+    case 2:  // Grab target method*
+      CHECK_EQ(cu->dex_file, target_method.dex_file);
+      cg->LoadRefDisp(arg0_ref,
+                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                          target_method.dex_method_index).Int32Value(),
+                      arg0_ref,
+                      kNotVolatile);
+      break;
+    case 3:  // Grab the code from the method*
+      if (direct_code == 0) {
+        // kInvokeTgt := arg0_ref->entrypoint
+        cg->LoadWordDisp(arg0_ref,
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
+                         cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  }
+  return state + 1;
+}
+
+NextCallInsn ArmMir2Lir::GetNextSDCallInsn() {
+  return ArmNextSDCallInsn;
+}
+
+LIR* ArmMir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
+  // For ARM, just generate a relative BL instruction that will be filled in at 'link time'.
+  // If the target turns out to be too far, the linker will generate a thunk for dispatch.
+  int target_method_idx = target_method.dex_method_index;
+  const DexFile* target_dex_file = target_method.dex_file;
+
+  // Generate the call instruction and save index, dex_file, and type.
+  // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
+  // as a placeholder for the offset.
+  LIR* call = RawLIR(current_dalvik_offset_, kThumb2Bl, 0,
+                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+  AppendLIR(call);
+  call_method_insns_.push_back(call);
+  return call;
+}
+
+LIR* ArmMir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
+  LIR* call_insn;
+  if (method_info.FastPath() && ArmUseRelativeCall(cu_, method_info.GetTargetMethod()) &&
+      (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
+      method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
+    call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
+  } else {
+    call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
+  }
+  return call_insn;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 84881e0860..932dd87923 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -20,6 +20,7 @@
 #include "arm_lir.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir.h"
+#include "utils/arena_containers.h"
 
 namespace art {
 
@@ -185,6 +186,28 @@ class ArmMir2Lir FINAL : public Mir2Lir {
       return false;  // Wide FPRs are formed by pairing.
     }
 
+    NextCallInsn GetNextSDCallInsn() OVERRIDE;
+
+    /*
+     * @brief Generate a relative call to the method that will be patched at link time.
+     * @param target_method The MethodReference of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @returns Call instruction
+     */
+    LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+
+    /*
+     * @brief Generate the actual call insn based on the method info.
+     * @param method_info the lowering info for the method call.
+     * @returns Call instruction
+     */
+    LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
+
+    /*
+     * @brief Handle ARM specific literals.
+     */
+    void InstallLiteralPools() OVERRIDE;
+
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
     size_t GetInstructionOffset(LIR* lir);
 
@@ -215,6 +238,8 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     static constexpr ResourceMask GetRegMaskArm(RegStorage reg);
     static constexpr ResourceMask EncodeArmRegList(int reg_list);
     static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list);
+
+    ArenaVector<LIR*> call_method_insns_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 0de2a445d0..1a4b23e271 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -377,7 +377,7 @@ LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_va
    * TODO: consider interspersing slowpaths in code following unconditional branches.
    */
   bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
-  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
+  skip &= ((mir_graph_->GetNumDalvikInsns() - current_dalvik_offset_) > 64);
   if (!skip && reg.Low8() && (check_value == 0)) {
     if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
       branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 0be478d11e..dd8f7fe3d8 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -452,6 +452,11 @@ std::string ArmMir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char
                  reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
                  lir->target);
              break;
+           case 'T':
+             snprintf(tbuf, arraysize(tbuf), "%s", PrettyMethod(
+                 static_cast<uint32_t>(lir->operands[1]),
+                 *reinterpret_cast<const DexFile*>(UnwrapPointer(lir->operands[2]))).c_str());
+             break;
            case 'u': {
              int offset_1 = lir->operands[0];
              int offset_2 = NEXT_LIR(lir)->operands[0];
@@ -551,7 +556,9 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 }
 
 ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena),
+      call_method_insns_(arena->Adapter()) {
+  call_method_insns_.reserve(100);
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kArmLast; i++) {
     if (ArmMir2Lir::EncodingMap[i].opcode != i) {
@@ -568,16 +575,16 @@ Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
 }
 
 void ArmMir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs,
-                                        dp_regs, reserved_regs, empty_pool /* reserved64 */,
-                                        core_temps, empty_pool /* core64_temps */, sp_temps,
-                                        dp_temps);
+  reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
+                                            sp_regs, dp_regs,
+                                            reserved_regs, empty_pool /* reserved64 */,
+                                            core_temps, empty_pool /* core64_temps */,
+                                            sp_temps, dp_temps));
 
   // Target-specific adjustments.
 
   // Alias single precision floats to appropriate half of overlapping double.
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int sp_reg_num = info->GetReg().GetRegNum();
     int dp_reg_num = sp_reg_num >> 1;
     RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
@@ -784,8 +791,7 @@ RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) {
      * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
      * different underlying physical registers.
      */
-    GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
-    for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    for (RegisterInfo* info : reg_pool_->dp_regs_) {
       if (!info->IsTemp() && !info->InUse()) {
         res = info->GetReg();
         info->MarkInUse();
@@ -809,8 +815,7 @@ RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) {
 // Reserve a callee-save sp single register.
 RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
   RegStorage res;
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : reg_pool_->sp_regs_) {
     if (!info->IsTemp() && !info->InUse()) {
       res = info->GetReg();
       int p_map_idx = SRegToPMap(s_reg);
@@ -825,4 +830,21 @@ RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
   return res;
 }
 
+void ArmMir2Lir::InstallLiteralPools() {
+  // PC-relative calls to methods.
+  patches_.reserve(call_method_insns_.size());
+  for (LIR* p : call_method_insns_) {
+      DCHECK_EQ(p->opcode, kThumb2Bl);
+      uint32_t target_method_idx = p->operands[1];
+      const DexFile* target_dex_file =
+          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
+
+      patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+                                                        target_dex_file, target_method_idx));
+  }
+
+  // And do the normal processing.
+  Mir2Lir::InstallLiteralPools();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index d001dd652a..ab7192143e 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -22,77 +22,75 @@
 namespace art {
 
 /*
- * TODO(Arm64): the comments below are outdated.
- *
  * Runtime register usage conventions.
  *
- * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
- *        However, for Dalvik->Dalvik calls we'll pass the target's Method*
- *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
- *        registers.
- * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : (rA64_SUSPEND) is reserved (suspend check/debugger assist)
- * r5   : Callee save (promotion target)
- * r6   : Callee save (promotion target)
- * r7   : Callee save (promotion target)
- * r8   : Callee save (promotion target)
- * r9   : (rA64_SELF) is reserved (pointer to thread-local storage)
- * r10  : Callee save (promotion target)
- * r11  : Callee save (promotion target)
- * r12  : Scratch, may be trashed by linkage stubs
- * r13  : (sp) is reserved
- * r14  : (lr) is reserved
- * r15  : (pc) is reserved
+ * r0     : As in C/C++ w0 is 32-bit return register and x0 is 64-bit.
+ * r0-r7  : Argument registers in both Dalvik and C/C++ conventions.
+ *          However, for Dalvik->Dalvik calls we'll pass the target's Method*
+ *          pointer in x0 as a hidden arg0. Otherwise used as codegen scratch
+ *          registers.
+ * r8-r15 : Caller save registers (used as temporary registers).
+ * r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+ *          the linker, by the trampolines and other stubs (the backend uses
+ *          these as temporary registers).
+ * r18    : (rxSELF) is reserved (pointer to thread-local storage).
+ * r19    : (rwSUSPEND) is reserved (suspend check/debugger assist).
+ * r20-r29: Callee save registers (promotion targets).
+ * r30    : (lr) is reserved (the link register).
+ * rsp    : (sp) is reserved (the stack pointer).
+ * rzr    : (zr) is reserved (the zero register).
+ *
+ * 18 core temps that codegen can use (r0-r17).
+ * 10 core registers that can be used for promotion.
  *
- * 5 core temps that codegen can use (r0, r1, r2, r3, r12)
- * 7 core registers that can be used for promotion
+ * Floating-point registers
+ * v0-v31
  *
- * Floating pointer registers
- * s0-s31
- * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
+ * v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+ *          This is analogous to the C/C++ (hard-float) calling convention.
+ * v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+ *          Also used as temporary and codegen scratch registers.
  *
- * s16-s31 (d8-d15) preserved across C calls
- * s0-s15 (d0-d7) trashed across C calls
+ * v0-v7 and v16-v31 : trashed across C calls.
+ * v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
  *
- * s0-s15/d0-d7 used as codegen temp/scratch
- * s16-s31/d8-d31 can be used for promotion.
+ * v16-v31: Used as codegen temp/scratch.
+ * v8-v15 : Can be used for promotion.
  *
- * Calling convention
- *     o On a call to a Dalvik method, pass target's Method* in r0
- *     o r1-r3 will be used for up to the first 3 words of arguments
- *     o Arguments past the first 3 words will be placed in appropriate
+ * Calling convention (Hard-float)
+ *     o On a call to a Dalvik method, pass target's Method* in x0
+ *     o r1-r7, v0-v7 will be used for the first 7+8 arguments
+ *     o Arguments which cannot be put in registers are placed in appropriate
  *       out slots by the caller.
- *     o If a 64-bit argument would span the register/memory argument
- *       boundary, it will instead be fully passed in the frame.
  *     o Maintain a 16-byte stack alignment
  *
  *  Stack frame diagram (stack grows down, higher addresses at top):
  *
- * +------------------------+
- * | IN[ins-1]              |  {Note: resides in caller's frame}
- * |       .                |
- * | IN[0]                  |
- * | caller's Method*       |
- * +========================+  {Note: start of callee's frame}
- * | spill region           |  {variable sized - will include lr if non-leaf.}
- * +------------------------+
- * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
- * +------------------------+
- * | V[locals-1]            |
- * | V[locals-2]            |
- * |      .                 |
- * |      .                 |
- * | V[1]                   |
- * | V[0]                   |
- * +------------------------+
- * |  0 to 3 words padding  |
- * +------------------------+
- * | OUT[outs-1]            |
- * | OUT[outs-2]            |
- * |       .                |
- * | OUT[0]                 |
- * | cur_method*            | <<== sp w/ 16-byte alignment
- * +========================+
+ * +--------------------------------------------+
+ * | IN[ins-1]                                  |  {Note: resides in caller's frame}
+ * |       .                                    |
+ * | IN[0]                                      |
+ * | caller's method (StackReference<ArtMethod>)|  {This is a compressed (4-bytes) reference}
+ * +============================================+  {Note: start of callee's frame}
+ * | spill region                               |  {variable sized - will include lr if non-leaf}
+ * +--------------------------------------------+
+ * |   ...filler word...                        |  {Note: used as 2nd word of V[locals-1] if long}
+ * +--------------------------------------------+
+ * | V[locals-1]                                |
+ * | V[locals-2]                                |
+ * |      .                                     |
+ * |      .                                     |
+ * | V[1]                                       |
+ * | V[0]                                       |
+ * +--------------------------------------------+
+ * |   0 to 3 words padding                     |
+ * +--------------------------------------------+
+ * | OUT[outs-1]                                |
+ * | OUT[outs-2]                                |
+ * |       .                                    |
+ * | OUT[0]                                     |
+ * | current method (StackReference<ArtMethod>) | <<== sp w/ 16-byte alignment
+ * +============================================+
  */
 
 // First FP callee save.
@@ -103,12 +101,12 @@ namespace art {
 #define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
 #define A64_REGSTORAGE_IS_SP_OR_ZR(rs) (((rs).GetRegNum() & 0x1f) == 0x1f)
 
-enum Arm64ResourceEncodingPos {
-  kArm64GPReg0   = 0,
-  kArm64RegLR    = 30,
-  kArm64RegSP    = 31,
-  kArm64FPReg0   = 32,
-  kArm64RegEnd   = 64,
+enum A64ResourceEncodingPos {
+  kA64GPReg0   = 0,
+  kA64RegLR    = 30,
+  kA64RegSP    = 31,
+  kA64FPReg0   = 32,
+  kA64RegEnd   = 64,
 };
 
 #define IS_SIGNED_IMM(size, value) \
@@ -186,15 +184,15 @@ constexpr RegStorage rs_wsp(RegStorage::kValid | rwsp);
 constexpr RegStorage rs_wLR(RegStorage::kValid | rwLR);
 
 // RegisterLocation templates return values (following the hard-float calling convention).
-const RegLocation arm_loc_c_return =
+const RegLocation a64_loc_c_return =
     {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_ref =
+const RegLocation a64_loc_c_return_ref =
     {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_wide =
+const RegLocation a64_loc_c_return_wide =
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_float =
+const RegLocation a64_loc_c_return_float =
     {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_double =
+const RegLocation a64_loc_c_return_double =
     {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
 
 /**
@@ -228,7 +226,7 @@ enum A64RegExtEncodings {
  * assembler. Their corresponding EncodingMap positions will be defined in
  * assemble_arm64.cc.
  */
-enum ArmOpcode {
+enum A64Opcode {
   kA64First = 0,
   kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
   kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
@@ -375,22 +373,13 @@ enum ArmOpcode {
  */
 
 // Return the wide and no-wide variants of the given opcode.
-#define WIDE(op) ((ArmOpcode)((op) | kA64Wide))
-#define UNWIDE(op) ((ArmOpcode)((op) & ~kA64Wide))
+#define WIDE(op) ((A64Opcode)((op) | kA64Wide))
+#define UNWIDE(op) ((A64Opcode)((op) & ~kA64Wide))
 
 // Whether the given opcode is wide.
 #define IS_WIDE(op) (((op) & kA64Wide) != 0)
 
-/*
- * Floating point variants. These are just aliases of the macros above which we use for floating
- * point instructions, just for readibility reasons.
- * TODO(Arm64): should we remove these and use the original macros?
- */
-#define FWIDE WIDE
-#define FUNWIDE UNWIDE
-#define IS_FWIDE IS_WIDE
-
-enum ArmOpDmbOptions {
+enum A64OpDmbOptions {
   kSY = 0xf,
   kST = 0xe,
   kISH = 0xb,
@@ -401,7 +390,7 @@ enum ArmOpDmbOptions {
 };
 
 // Instruction assembly field_loc kind.
-enum ArmEncodingKind {
+enum A64EncodingKind {
   // All the formats below are encoded in the same way (as a kFmtBitBlt).
   // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
   kFmtRegW = 0,   // Word register (w) or wzr.
@@ -425,15 +414,15 @@ enum ArmEncodingKind {
 };
 
 // Struct used to define the snippet positions for each A64 opcode.
-struct ArmEncodingMap {
+struct A64EncodingMap {
   uint32_t wskeleton;
   uint32_t xskeleton;
   struct {
-    ArmEncodingKind kind;
+    A64EncodingKind kind;
     int end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
     int start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
   } field_loc[4];
-  ArmOpcode opcode;  // can be WIDE()-ned to indicate it has a wide variant.
+  A64Opcode opcode;  // can be WIDE()-ned to indicate it has a wide variant.
   uint64_t flags;
   const char* name;
   const char* fmt;
@@ -441,25 +430,6 @@ struct ArmEncodingMap {
   FixupKind fixup;
 };
 
-#if 0
-// TODO(Arm64): try the following alternative, which fits exactly in one cache line (64 bytes).
-struct ArmEncodingMap {
-  uint32_t wskeleton;
-  uint32_t xskeleton;
-  uint64_t flags;
-  const char* name;
-  const char* fmt;
-  struct {
-    uint8_t kind;
-    int8_t end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
-    int8_t start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
-  } field_loc[4];
-  uint32_t fixup;
-  uint32_t opcode;         // can be WIDE()-ned to indicate it has a wide variant.
-  uint32_t padding[3];
-};
-#endif
-
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 5115246fc8..b1cf279683 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -47,7 +47,7 @@ namespace art {
   CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000))
 
 /*
- * opcode: ArmOpcode enum
+ * opcode: A64Opcode enum
  * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros.
  * a{n}k: key to applying argument {n}    \
  * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3
@@ -102,8 +102,8 @@ namespace art {
  *
  *  [!] escape.  To insert "!", use "!!"
  */
-/* NOTE: must be kept in sync with enum ArmOpcode from arm64_lir.h */
-const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
+/* NOTE: must be kept in sync with enum A64Opcode from arm64_lir.h */
+const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
     ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
@@ -229,27 +229,27 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12,
                  "extr", "!0r, !1r, !2r, #!3d", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
+    ENCODING_MAP(WIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1,
                  "fabs", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
+    ENCODING_MAP(WIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fadd", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
+    ENCODING_MAP(WIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
                  kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES,
                  "fcmp", "!0f, #0", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
+    ENCODING_MAP(WIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
                  kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
                  "fcmp", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
+    ENCODING_MAP(WIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
                  kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fcvtzs", "!0w, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
+    ENCODING_MAP(WIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
                  kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fcvtzs", "!0x, !1f", kFixupNone),
@@ -269,23 +269,23 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fcvtms", "!0x, !1S", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
+    ENCODING_MAP(WIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fdiv", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fmax3fff), FLOAT_VARIANTS(0x1e204800),
+    ENCODING_MAP(WIDE(kA64Fmax3fff), FLOAT_VARIANTS(0x1e204800),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fmax", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fmin3fff), FLOAT_VARIANTS(0x1e205800),
+    ENCODING_MAP(WIDE(kA64Fmin3fff), FLOAT_VARIANTS(0x1e205800),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fmin", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
+    ENCODING_MAP(WIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "fmov", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
+    ENCODING_MAP(WIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
                  kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "fmov", "!0f, #!1I", kFixupNone),
@@ -305,35 +305,35 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0x, !1S", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
+    ENCODING_MAP(WIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fmul", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
+    ENCODING_MAP(WIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fneg", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Frintp2ff), FLOAT_VARIANTS(0x1e24c000),
+    ENCODING_MAP(WIDE(kA64Frintp2ff), FLOAT_VARIANTS(0x1e24c000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "frintp", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Frintm2ff), FLOAT_VARIANTS(0x1e254000),
+    ENCODING_MAP(WIDE(kA64Frintm2ff), FLOAT_VARIANTS(0x1e254000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "frintm", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Frintn2ff), FLOAT_VARIANTS(0x1e244000),
+    ENCODING_MAP(WIDE(kA64Frintn2ff), FLOAT_VARIANTS(0x1e244000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "frintn", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
+    ENCODING_MAP(WIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "frintz", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
+    ENCODING_MAP(WIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fsqrt", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
+    ENCODING_MAP(WIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fsub", "!0f, !1f, !2f", kFixupNone),
@@ -369,7 +369,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
                  kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
+    ENCODING_MAP(WIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
                  kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
@@ -379,7 +379,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
                  "ldr", "!0r, !1p", kFixupLoad),
-    ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
+    ENCODING_MAP(WIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0f, [!1X, #!2D]", kFixupNone),
@@ -387,7 +387,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0r, [!1X, #!2D]", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
+    ENCODING_MAP(WIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
                  kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
                  "ldr", "!0f, [!1X, !2x!3G]", kFixupNone),
@@ -411,7 +411,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
                  kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
                  "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
+    ENCODING_MAP(WIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
                  "ldur", "!0f, [!1X, #!2d]", kFixupNone),
@@ -507,11 +507,11 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
                  kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
                  "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
+    ENCODING_MAP(WIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
                  kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "scvtf", "!0f, !1w", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
+    ENCODING_MAP(WIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
                  kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "scvtf", "!0f, !1x", kFixupNone),
@@ -547,11 +547,11 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
                  kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
                  "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
+    ENCODING_MAP(WIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0f, [!1X, #!2D]", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
+    ENCODING_MAP(WIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
                  kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
                  "str", "!0f, [!1X, !2x!3G]", kFixupNone),
@@ -583,7 +583,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE,
                  "str", "!0r, [!1X], #!2d", kFixupNone),
-    ENCODING_MAP(FWIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
+    ENCODING_MAP(WIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
                  "stur", "!0f, [!1X, #!2d]", kFixupNone),
@@ -667,21 +667,21 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir)
 uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
   for (; lir != nullptr; lir = NEXT_LIR(lir)) {
     bool opcode_is_wide = IS_WIDE(lir->opcode);
-    ArmOpcode opcode = UNWIDE(lir->opcode);
+    A64Opcode opcode = UNWIDE(lir->opcode);
 
     if (UNLIKELY(IsPseudoLirOp(opcode))) {
       continue;
     }
 
     if (LIKELY(!lir->flags.is_nop)) {
-      const ArmEncodingMap *encoder = &EncodingMap[opcode];
+      const A64EncodingMap *encoder = &EncodingMap[opcode];
 
       // Select the right variant of the skeleton.
       uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
       DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode));
 
       for (int i = 0; i < 4; i++) {
-        ArmEncodingKind kind = encoder->field_loc[i].kind;
+        A64EncodingKind kind = encoder->field_loc[i].kind;
         uint32_t operand = lir->operands[i];
         uint32_t value;
 
@@ -1027,7 +1027,7 @@ void Arm64Mir2Lir::AssembleLIR() {
 }
 
 size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) {
-  ArmOpcode opcode = UNWIDE(lir->opcode);
+  A64Opcode opcode = UNWIDE(lir->opcode);
   DCHECK(!IsPseudoLirOp(opcode));
   return EncodingMap[opcode].size;
 }
@@ -1038,7 +1038,7 @@ uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t off
 
   LIR* last_fixup = NULL;
   for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
-    ArmOpcode opcode = UNWIDE(lir->opcode);
+    A64Opcode opcode = UNWIDE(lir->opcode);
     if (!lir->flags.is_nop) {
       if (lir->flags.fixup != kFixupNone) {
         if (!IsPseudoLirOp(opcode)) {
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index b9c0990473..6081f289e2 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -55,7 +55,7 @@ void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLoca
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
   tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
@@ -108,7 +108,7 @@ void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLoca
   uint32_t size = table[1];
   tab_rec->targets =
       static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
@@ -167,7 +167,7 @@ void Arm64Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocatio
   uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
   tab_rec->size = (size * width) + 8;
 
-  fill_array_data_.Insert(tab_rec);
+  fill_array_data_.push_back(tab_rec);
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 1c402928c3..55cc938422 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -395,7 +395,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
                      RegLocation rl_src2, bool is_div);
 
   InToRegStorageMapping in_to_reg_storage_mapping_;
-  static const ArmEncodingMap EncodingMap[kA64Last];
+  static const A64EncodingMap EncodingMap[kA64Last];
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index a39d151c48..db24d124ab 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -112,7 +112,7 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
   rl_result = EvalLoc(rl_dest, kFPReg, true);
   DCHECK(rl_dest.wide);
   DCHECK(rl_result.wide);
-  NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  NewLIR3(WIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -145,17 +145,17 @@ void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
       dst_reg_class = kFPReg;
       break;
     case Instruction::INT_TO_DOUBLE:
-      op = FWIDE(kA64Scvtf2fw);
+      op = WIDE(kA64Scvtf2fw);
       src_reg_class = kCoreReg;
       dst_reg_class = kFPReg;
       break;
     case Instruction::DOUBLE_TO_INT:
-      op = FWIDE(kA64Fcvtzs2wf);
+      op = WIDE(kA64Fcvtzs2wf);
       src_reg_class = kFPReg;
       dst_reg_class = kCoreReg;
       break;
     case Instruction::LONG_TO_DOUBLE:
-      op = FWIDE(kA64Scvtf2fx);
+      op = WIDE(kA64Scvtf2fx);
       src_reg_class = kCoreReg;
       dst_reg_class = kFPReg;
       break;
@@ -170,7 +170,7 @@ void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
       dst_reg_class = kFPReg;
       break;
     case Instruction::DOUBLE_TO_LONG:
-      op = FWIDE(kA64Fcvtzs2xf);
+      op = WIDE(kA64Fcvtzs2xf);
       src_reg_class = kFPReg;
       dst_reg_class = kCoreReg;
       break;
@@ -208,7 +208,7 @@ void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
     rl_src2 = mir_graph_->GetSrcWide(mir, 2);
     rl_src1 = LoadValueWide(rl_src1, kFPReg);
     rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+    NewLIR2(WIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = mir_graph_->GetSrc(mir, 0);
     rl_src2 = mir_graph_->GetSrc(mir, 1);
@@ -281,7 +281,7 @@ void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
     ClobberSReg(rl_dest.s_reg_low);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     LoadConstant(rl_result.reg, default_result);
-    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+    NewLIR2(WIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = LoadValue(rl_src1, kFPReg);
     rl_src2 = LoadValue(rl_src2, kFPReg);
@@ -318,7 +318,7 @@ void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kFPReg);
   rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(WIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -370,7 +370,7 @@ bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
   rl_src = LoadValueWide(rl_src, reg_class);
   RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
   if (reg_class == kFPReg) {
-    NewLIR2(FWIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+    NewLIR2(WIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   } else {
     // Clear the sign bit in an integer register.
     OpRegRegImm64(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffffffffffff);
@@ -384,7 +384,7 @@ bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(WIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
@@ -394,7 +394,7 @@ bool Arm64Mir2Lir::GenInlinedCeil(CallInfo* info) {
   RegLocation rl_dest = InlineTargetWide(info);
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(FWIDE(kA64Frintp2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(WIDE(kA64Frintp2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
@@ -404,7 +404,7 @@ bool Arm64Mir2Lir::GenInlinedFloor(CallInfo* info) {
   RegLocation rl_dest = InlineTargetWide(info);
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(FWIDE(kA64Frintm2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(WIDE(kA64Frintm2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
@@ -414,14 +414,14 @@ bool Arm64Mir2Lir::GenInlinedRint(CallInfo* info) {
   RegLocation rl_dest = InlineTargetWide(info);
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(FWIDE(kA64Frintn2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(WIDE(kA64Frintn2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
 
 bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
   int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f));
-  ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0);
+  A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
   rl_src = (is_double) ? LoadValueWide(rl_src, kFPReg) : LoadValue(rl_src, kFPReg);
@@ -439,7 +439,7 @@ bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
 bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
   DCHECK_EQ(cu_->instruction_set, kArm64);
   int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff;
-  ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0);
+  A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src1 = info->args[0];
   RegLocation rl_src2 = (is_double) ? info->args[2] : info->args[1];
   rl_src1 = (is_double) ? LoadValueWide(rl_src1, kFPReg) : LoadValue(rl_src1, kFPReg);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 094db4cac3..88123e1699 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -262,18 +262,18 @@ LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
   if (check_value == 0) {
     if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
-      ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
-      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+      A64Opcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
       branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
     } else if (arm_cond == kArmCondLs) {
       // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
       // This case happens for a bounds check of array[0].
-      ArmOpcode opcode = kA64Cbz2rt;
-      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+      A64Opcode opcode = kA64Cbz2rt;
+      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
       branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
     } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) {
-      ArmOpcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
-      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+      A64Opcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
+      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
       int value = reg.Is64Bit() ? 63 : 31;
       branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0);
     }
@@ -305,7 +305,7 @@ LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
 LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
   bool dest_is_fp = r_dest.IsFloat();
   bool src_is_fp = r_src.IsFloat();
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
   LIR* res;
 
   if (LIKELY(dest_is_fp == src_is_fp)) {
@@ -333,7 +333,7 @@ LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
       DCHECK_EQ(dest_is_double, src_is_double);
 
       // Homogeneous float/float copy.
-      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
+      opcode = (dest_is_double) ? WIDE(kA64Fmov2ff) : kA64Fmov2ff;
     }
   } else {
     // Inhomogeneous register copy.
@@ -630,7 +630,7 @@ RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegS
     // temp = r_src1 / r_src2
     // dest = r_src1 - temp * r_src2
     RegStorage temp;
-    ArmOpcode wide;
+    A64Opcode wide;
     if (rl_result.reg.Is64Bit()) {
       temp = AllocTempWide();
       wide = WIDE(0);
@@ -770,7 +770,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   RegStorage r_tmp;
   RegStorage r_tmp_stored;
   RegStorage rl_new_value_stored = rl_new_value.reg;
-  ArmOpcode wide = UNWIDE(0);
+  A64Opcode wide = UNWIDE(0);
   if (is_long) {
     r_tmp_stored = r_tmp = AllocTempWide();
     wide = WIDE(0);
@@ -962,7 +962,7 @@ LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* tar
   // Combine sub & test using sub setflags encoding here.  We need to make sure a
   // subtract form that sets carry is used, so generate explicitly.
   // TODO: might be best to add a new op, kOpSubs, and handle it generically.
-  ArmOpcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
+  A64Opcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
   NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   return OpCondBranch(c_code, target);
@@ -1459,7 +1459,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
   for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
-      m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
+      m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
@@ -1570,7 +1570,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       // Have some FP regs to do.
       fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
       if (UNLIKELY(reg2 < 0)) {
-        m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
+        m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                      cur_offset);
         // Do not increment offset here, as the second half will be filled by a core reg.
       } else {
@@ -1643,7 +1643,7 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
   for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
      reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
-      m2l->NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
+      m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
     } else {
       m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
@@ -1705,7 +1705,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
 }
 
 bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
-  ArmOpcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0);
+  A64Opcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src_i = info->args[0];
   RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index d7d5651b41..0462530a32 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -84,23 +84,23 @@ static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
 static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation Arm64Mir2Lir::LocCReturn() {
-  return arm_loc_c_return;
+  return a64_loc_c_return;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnRef() {
-  return arm_loc_c_return_ref;
+  return a64_loc_c_return_ref;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
-  return arm_loc_c_return_wide;
+  return a64_loc_c_return_wide;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnFloat() {
-  return arm_loc_c_return_float;
+  return a64_loc_c_return_float;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnDouble() {
-  return arm_loc_c_return_double;
+  return a64_loc_c_return_double;
 }
 
 // Return a target-dependent special register.
@@ -153,7 +153,7 @@ ResourceMask Arm64Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
 
   return ResourceMask::Bit(
       // FP register starts at bit position 32.
-      (reg.IsFloat() ? kArm64FPReg0 : 0) + reg.GetRegNum());
+      (reg.IsFloat() ? kA64FPReg0 : 0) + reg.GetRegNum());
 }
 
 ResourceMask Arm64Mir2Lir::GetPCUseDefEncoding() const {
@@ -173,15 +173,15 @@ void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
   // These flags are somewhat uncommon - bypass if we can.
   if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
     if (flags & REG_DEF_SP) {
-      def_mask->SetBit(kArm64RegSP);
+      def_mask->SetBit(kA64RegSP);
     }
 
     if (flags & REG_USE_SP) {
-      use_mask->SetBit(kArm64RegSP);
+      use_mask->SetBit(kA64RegSP);
     }
 
     if (flags & REG_DEF_LR) {
-      def_mask->SetBit(kArm64RegLR);
+      def_mask->SetBit(kA64RegLR);
     }
   }
 }
@@ -408,7 +408,7 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch
              snprintf(tbuf, arraysize(tbuf), "d%d", operand & RegStorage::kRegNumMask);
              break;
            case 'f':
-             snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
+             snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_WIDE(lir->opcode)) ? 'd' : 's',
                       operand & RegStorage::kRegNumMask);
              break;
            case 'l': {
@@ -534,7 +534,7 @@ void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, cons
     char num[8];
     int i;
 
-    for (i = 0; i < kArm64RegEnd; i++) {
+    for (i = 0; i < kA64RegEnd; i++) {
       if (mask.HasBit(i)) {
         snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
@@ -602,14 +602,13 @@ Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph
 }
 
 void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, core64_regs, sp_regs, dp_regs,
-                                        reserved_regs, reserved64_regs, core_temps, core64_temps,
-                                        sp_temps, dp_temps);
+  reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, core64_regs, sp_regs, dp_regs,
+                                            reserved_regs, reserved64_regs,
+                                            core_temps, core64_temps, sp_temps, dp_temps));
 
   // Target-specific adjustments.
   // Alias single precision float registers to corresponding double registers.
-  GrowableArray<RegisterInfo*>::Iterator fp_it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = fp_it.Next(); info != nullptr; info = fp_it.Next()) {
+  for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int fp_reg_num = info->GetReg().GetRegNum();
     RegStorage dp_reg = RegStorage::FloatSolo64(fp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
@@ -622,8 +621,7 @@ void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
   }
 
   // Alias 32bit W registers to corresponding 64bit X registers.
-  GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
+  for (RegisterInfo* info : reg_pool_->core_regs_) {
     int x_reg_num = info->GetReg().GetRegNum();
     RegStorage x_reg = RegStorage::Solo64(x_reg_num);
     RegisterInfo* x_reg_info = GetRegInfo(x_reg);
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 5326e74e16..38670ff8be 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -89,9 +89,9 @@ int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
 
 size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
   bool opcode_is_wide = IS_WIDE(lir->opcode);
-  ArmOpcode opcode = UNWIDE(lir->opcode);
+  A64Opcode opcode = UNWIDE(lir->opcode);
   DCHECK(!IsPseudoLirOp(opcode));
-  const ArmEncodingMap *encoder = &EncodingMap[opcode];
+  const A64EncodingMap *encoder = &EncodingMap[opcode];
   uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
   return (bits >> 30);
 }
@@ -138,7 +138,7 @@ LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
   } else {
     int32_t encoded_imm = EncodeImmDouble(value);
     if (encoded_imm >= 0) {
-      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
+      return NewLIR2(WIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
     }
   }
 
@@ -151,7 +151,7 @@ LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
   }
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2fp),
                             r_dest.GetReg(), 0, 0, 0, 0, data_target);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
@@ -415,7 +415,7 @@ LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
     // 1 instruction is enough to load the immediate.
     if (LIKELY(low_bits == high_bits)) {
       // Value is either 0 or -1: we can just use wzr.
-      ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
+      A64Opcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
       res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
     } else {
       uint16_t uniform_bits, useful_bits;
@@ -466,7 +466,7 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
 
   if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
     // value is either 0 or -1: we can just use xzr.
-    ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
+    A64Opcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
     return NewLIR2(opcode, r_dest.GetReg(), rxzr);
   }
 
@@ -486,7 +486,7 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
 
   if (num_slow_halfwords <= max_num_ops_per_const_load) {
     // We can encode the number using a movz/movn followed by one or more movk.
-    ArmOpcode op;
+    A64Opcode op;
     uint16_t background;
     LIR* res = nullptr;
 
@@ -548,15 +548,11 @@ LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
 }
 
 LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
   switch (op) {
     case kOpBlx:
       opcode = kA64Blr1x;
       break;
-    // TODO(Arm64): port kThumbBx.
-    // case kOpBx:
-    //   opcode = kThumbBx;
-    //   break;
     default:
       LOG(FATAL) << "Bad opcode " << op;
   }
@@ -564,9 +560,9 @@ LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
 }
 
 LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
-  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
   CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
 
   switch (op) {
     case kOpCmn:
@@ -621,7 +617,7 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r
     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
     return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
   } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
     if (kind == kFmtShift) {
       return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
     }
@@ -633,8 +629,8 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r
 
 LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
                                   A64RegExtEncodings ext, uint8_t amount) {
-  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  A64Opcode opcode = kA64Brk1d;
 
   switch (op) {
     case kOpCmn:
@@ -655,7 +651,7 @@ LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage
 
   DCHECK(!IsPseudoLirOp(opcode));
   if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
     if (kind == kFmtExtend) {
       return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
                      EncodeExtend(ext, amount));
@@ -694,7 +690,7 @@ LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest,
 
 LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
                                     RegStorage r_src2, int shift) {
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
 
   switch (op) {
     case kOpAdd:
@@ -747,7 +743,7 @@ LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_s
   // The instructions above belong to two kinds:
   // - 4-operands instructions, where the last operand is a shift/extend immediate,
   // - 3-operands instructions with no shift/extend.
-  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+  A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
   CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
   CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
@@ -762,7 +758,7 @@ LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_s
 
 LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
                                      RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
 
   switch (op) {
     case kOpAdd:
@@ -775,7 +771,7 @@ LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_
       LOG(FATAL) << "Unimplemented opcode: " << op;
       break;
   }
-  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+  A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
 
   if (r_dest.Is64Bit()) {
     CHECK(r_src1.Is64Bit());
@@ -810,11 +806,11 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1
   LIR* res;
   bool neg = (value < 0);
   int64_t abs_value = (neg) ? -value : value;
-  ArmOpcode opcode = kA64Brk1d;
-  ArmOpcode alt_opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
+  A64Opcode alt_opcode = kA64Brk1d;
   bool is_logical = false;
   bool is_wide = r_dest.Is64Bit();
-  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  A64Opcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
   int info = 0;
 
   switch (op) {
@@ -937,9 +933,9 @@ LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
 }
 
 LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
-  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
-  ArmOpcode opcode = kA64Brk1d;
-  ArmOpcode neg_opcode = kA64Brk1d;
+  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  A64Opcode opcode = kA64Brk1d;
+  A64Opcode neg_opcode = kA64Brk1d;
   bool shift;
   bool neg = (value < 0);
   uint64_t abs_value = (neg) ? -value : value;
@@ -1025,7 +1021,7 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
                                    int scale, OpSize size) {
   LIR* load;
   int expected_scale = 0;
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
   r_base = Check64BitReg(r_base);
 
   // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
@@ -1040,7 +1036,7 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
     if (r_dest.IsDouble()) {
       DCHECK(size == k64 || size == kDouble);
       expected_scale = 3;
-      opcode = FWIDE(kA64Ldr4fXxG);
+      opcode = WIDE(kA64Ldr4fXxG);
     } else {
       DCHECK(r_dest.IsSingle());
       DCHECK(size == k32 || size == kSingle);
@@ -1113,7 +1109,7 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt
                                     int scale, OpSize size) {
   LIR* store;
   int expected_scale = 0;
-  ArmOpcode opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
   r_base = Check64BitReg(r_base);
 
   // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
@@ -1128,7 +1124,7 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt
     if (r_src.IsDouble()) {
       DCHECK(size == k64 || size == kDouble);
       expected_scale = 3;
-      opcode = FWIDE(kA64Str4fXxG);
+      opcode = WIDE(kA64Str4fXxG);
     } else {
       DCHECK(r_src.IsSingle());
       DCHECK(size == k32 || size == kSingle);
@@ -1197,8 +1193,8 @@ LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegSto
 LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
                                     OpSize size) {
   LIR* load = NULL;
-  ArmOpcode opcode = kA64Brk1d;
-  ArmOpcode alt_opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
+  A64Opcode alt_opcode = kA64Brk1d;
   int scale = 0;
 
   switch (size) {
@@ -1209,8 +1205,8 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor
       scale = 3;
       if (r_dest.IsFloat()) {
         DCHECK(r_dest.IsDouble());
-        opcode = FWIDE(kA64Ldr3fXD);
-        alt_opcode = FWIDE(kA64Ldur3fXd);
+        opcode = WIDE(kA64Ldr3fXD);
+        alt_opcode = WIDE(kA64Ldur3fXd);
       } else {
         opcode = WIDE(kA64Ldr3rXD);
         alt_opcode = WIDE(kA64Ldur3rXd);
@@ -1294,8 +1290,8 @@ LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r
 LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
                                      OpSize size) {
   LIR* store = NULL;
-  ArmOpcode opcode = kA64Brk1d;
-  ArmOpcode alt_opcode = kA64Brk1d;
+  A64Opcode opcode = kA64Brk1d;
+  A64Opcode alt_opcode = kA64Brk1d;
   int scale = 0;
 
   switch (size) {
@@ -1306,11 +1302,11 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto
       scale = 3;
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsDouble());
-        opcode = FWIDE(kA64Str3fXD);
-        alt_opcode = FWIDE(kA64Stur3fXd);
+        opcode = WIDE(kA64Str3fXD);
+        alt_opcode = WIDE(kA64Stur3fXd);
       } else {
-        opcode = FWIDE(kA64Str3rXD);
-        alt_opcode = FWIDE(kA64Stur3rXd);
+        opcode = WIDE(kA64Str3rXD);
+        alt_opcode = WIDE(kA64Stur3rXd);
       }
       break;
     case kSingle:     // Intentional fall-through.
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index d90bce119a..f30501749a 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -319,7 +319,7 @@ void Mir2Lir::CodegenDump() {
   LOG(INFO) << "Dumping LIR insns for "
             << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   LIR* lir_insn;
-  int insns_size = cu_->code_item->insns_size_in_code_units_;
+  int insns_size = mir_graph_->GetNumDalvikInsns();
 
   LOG(INFO) << "Regs (excluding ins) : " << mir_graph_->GetNumOfLocalCodeVRs();
   LOG(INFO) << "Ins          : " << mir_graph_->GetNumOfInVRs();
@@ -472,20 +472,15 @@ void Mir2Lir::InstallLiteralPools() {
     Push32(code_buffer_, data_lir->operands[0]);
     data_lir = NEXT_LIR(data_lir);
   }
+  // TODO: patches_.reserve() as needed.
   // Push code and method literals, record offsets for the compiler to patch.
   data_lir = code_literal_list_;
   while (data_lir != NULL) {
     uint32_t target_method_idx = data_lir->operands[0];
     const DexFile* target_dex_file =
         reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
-    cu_->compiler_driver->AddCodePatch(cu_->dex_file,
-                                       cu_->class_def_idx,
-                                       cu_->method_idx,
-                                       cu_->invoke_type,
-                                       target_method_idx,
-                                       target_dex_file,
-                                       static_cast<InvokeType>(data_lir->operands[2]),
-                                       code_buffer_.size());
+    patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
+                                              target_dex_file, target_method_idx));
     const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
     // unique value based on target to ensure code deduplication works
     PushPointer(code_buffer_, &target_method_id, cu_->target64);
@@ -496,14 +491,8 @@ void Mir2Lir::InstallLiteralPools() {
     uint32_t target_method_idx = data_lir->operands[0];
     const DexFile* target_dex_file =
         reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
-    cu_->compiler_driver->AddMethodPatch(cu_->dex_file,
-                                         cu_->class_def_idx,
-                                         cu_->method_idx,
-                                         cu_->invoke_type,
-                                         target_method_idx,
-                                         target_dex_file,
-                                         static_cast<InvokeType>(data_lir->operands[2]),
-                                         code_buffer_.size());
+    patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
+                                                target_dex_file, target_method_idx));
     const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
     // unique value based on target to ensure code deduplication works
     PushPointer(code_buffer_, &target_method_id, cu_->target64);
@@ -512,16 +501,12 @@ void Mir2Lir::InstallLiteralPools() {
   // Push class literals.
   data_lir = class_literal_list_;
   while (data_lir != NULL) {
-    uint32_t target_method_idx = data_lir->operands[0];
+    uint32_t target_type_idx = data_lir->operands[0];
     const DexFile* class_dex_file =
       reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
-    cu_->compiler_driver->AddClassPatch(cu_->dex_file,
-                                        cu_->class_def_idx,
-                                        cu_->method_idx,
-                                        target_method_idx,
-                                        class_dex_file,
-                                        code_buffer_.size());
-    const DexFile::TypeId& target_method_id = class_dex_file->GetTypeId(target_method_idx);
+    patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
+                                              class_dex_file, target_type_idx));
+    const DexFile::TypeId& target_method_id = class_dex_file->GetTypeId(target_type_idx);
     // unique value based on target to ensure code deduplication works
     PushPointer(code_buffer_, &target_method_id, cu_->target64);
     data_lir = NEXT_LIR(data_lir);
@@ -530,10 +515,7 @@ void Mir2Lir::InstallLiteralPools() {
 
 /* Write the switch tables to the output stream */
 void Mir2Lir::InstallSwitchTables() {
-  GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_);
-  while (true) {
-    Mir2Lir::SwitchTable* tab_rec = iterator.Next();
-    if (tab_rec == NULL) break;
+  for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
     AlignBuffer(code_buffer_, tab_rec->offset);
     /*
      * For Arm, our reference point is the address of the bx
@@ -590,10 +572,7 @@ void Mir2Lir::InstallSwitchTables() {
 
 /* Write the fill array dta to the output stream */
 void Mir2Lir::InstallFillArrayData() {
-  GrowableArray<FillArrayData*>::Iterator iterator(&fill_array_data_);
-  while (true) {
-    Mir2Lir::FillArrayData *tab_rec = iterator.Next();
-    if (tab_rec == NULL) break;
+  for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) {
     AlignBuffer(code_buffer_, tab_rec->offset);
     for (int i = 0; i < (tab_rec->size + 1) / 2; i++) {
       code_buffer_.push_back(tab_rec->table[i] & 0xFF);
@@ -801,10 +780,7 @@ int Mir2Lir::AssignLiteralOffset(CodeOffset offset) {
 }
 
 int Mir2Lir::AssignSwitchTablesOffset(CodeOffset offset) {
-  GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_);
-  while (true) {
-    Mir2Lir::SwitchTable* tab_rec = iterator.Next();
-    if (tab_rec == NULL) break;
+  for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
     tab_rec->offset = offset;
     if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
       offset += tab_rec->table[1] * (sizeof(int) * 2);
@@ -818,15 +794,12 @@ int Mir2Lir::AssignSwitchTablesOffset(CodeOffset offset) {
 }
 
 int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) {
-  GrowableArray<FillArrayData*>::Iterator iterator(&fill_array_data_);
-  while (true) {
-    Mir2Lir::FillArrayData *tab_rec = iterator.Next();
-    if (tab_rec == NULL) break;
+  for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) {
     tab_rec->offset = offset;
     offset += tab_rec->size;
     // word align
     offset = RoundUp(offset, 4);
-    }
+  }
   return offset;
 }
 
@@ -878,10 +851,7 @@ void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
 }
 
 void Mir2Lir::ProcessSwitchTables() {
-  GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_);
-  while (true) {
-    Mir2Lir::SwitchTable *tab_rec = iterator.Next();
-    if (tab_rec == NULL) break;
+  for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
     if (tab_rec->table[0] == Instruction::kPackedSwitchSignature) {
       MarkPackedCaseLabels(tab_rec);
     } else if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
@@ -1006,21 +976,22 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       first_fixup_(NULL),
       cu_(cu),
       mir_graph_(mir_graph),
-      switch_tables_(arena, 4, kGrowableArraySwitchTables),
-      fill_array_data_(arena, 4, kGrowableArrayFillArrayData),
-      tempreg_info_(arena, 20, kGrowableArrayMisc),
-      reginfo_map_(arena, RegStorage::kMaxRegs, kGrowableArrayMisc),
-      pointer_storage_(arena, 128, kGrowableArrayMisc),
+      switch_tables_(arena->Adapter(kArenaAllocSwitchTable)),
+      fill_array_data_(arena->Adapter(kArenaAllocFillArrayData)),
+      tempreg_info_(arena->Adapter()),
+      reginfo_map_(arena->Adapter()),
+      pointer_storage_(arena->Adapter()),
       data_offset_(0),
       total_size_(0),
       block_label_list_(NULL),
       promotion_map_(NULL),
       current_dalvik_offset_(0),
       estimated_native_code_size_(0),
-      reg_pool_(NULL),
+      reg_pool_(nullptr),
       live_sreg_(0),
       core_vmap_table_(mir_graph->GetArena()->Adapter()),
       fp_vmap_table_(mir_graph->GetArena()->Adapter()),
+      patches_(mir_graph->GetArena()->Adapter()),
       num_core_spills_(0),
       num_fp_spills_(0),
       frame_size_(0),
@@ -1028,9 +999,15 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
       last_lir_insn_(NULL),
-      slow_paths_(arena, 32, kGrowableArraySlowPaths),
+      slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
       mask_cache_(arena) {
+  switch_tables_.reserve(4);
+  fill_array_data_.reserve(4);
+  tempreg_info_.reserve(20);
+  reginfo_map_.reserve(RegStorage::kMaxRegs);
+  pointer_storage_.reserve(128);
+  slow_paths_.reserve(32);
   // Reserve pointer id 0 for NULL.
   size_t null_idx = WrapPointer(NULL);
   DCHECK_EQ(null_idx, 0U);
@@ -1106,11 +1083,17 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
   }
 
+  // Sort patches by literal offset for better deduplication.
+  std::sort(patches_.begin(), patches_.end(), [](const LinkerPatch& lhs, const LinkerPatch& rhs) {
+    return lhs.LiteralOffset() < rhs.LiteralOffset();
+  });
+
   std::unique_ptr<std::vector<uint8_t>> cfi_info(ReturnFrameDescriptionEntry());
   CompiledMethod* result =
       new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, &src_mapping_table_, encoded_mapping_table_,
-                         vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
+                         vmap_encoder.GetData(), native_gc_map_, cfi_info.get(),
+                         ArrayRef<LinkerPatch>(patches_));
   return result;
 }
 
@@ -1223,7 +1206,7 @@ LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStor
 }
 
 void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
-  slow_paths_.Insert(slowpath);
+  slow_paths_.push_back(slowpath);
   ResetDefTracking();
 }
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index fbe710bebd..9f7a8813c0 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -757,11 +757,10 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Typ
 void Mir2Lir::HandleSlowPaths() {
   // We should check slow_paths_.Size() every time, because a new slow path
   // may be created during slowpath->Compile().
-  for (size_t i = 0; i < slow_paths_.Size(); ++i) {
-    LIRSlowPath* slowpath = slow_paths_.Get(i);
+  for (LIRSlowPath* slowpath : slow_paths_) {
     slowpath->Compile();
   }
-  slow_paths_.Reset();
+  slow_paths_.clear();
 }
 
 void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 960f21790b..67a75cbd62 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arm/codegen_arm.h"
 #include "dex/compiler_ir.h"
 #include "dex/frontend.h"
 #include "dex/quick/dex_file_method_inliner.h"
@@ -27,7 +28,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string.h"
 #include "mir_to_lir-inl.h"
-#include "x86/codegen_x86.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
@@ -493,15 +494,15 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
                           uint32_t unused,
                           uintptr_t direct_code, uintptr_t direct_method,
                           InvokeType type) {
+  DCHECK(cu->instruction_set != kX86 && cu->instruction_set != kX86_64 &&
+         cu->instruction_set != kThumb2 && cu->instruction_set != kArm);
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
-        if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-        }
-      } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+        cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+      } else {
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
       if (direct_method != static_cast<uintptr_t>(-1)) {
@@ -529,7 +530,7 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
           cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-        } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+        } else {
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
@@ -547,7 +548,7 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
         if (CommonCallCodeLoadCodePointerIntoInvokeTgt(info, &arg0_ref, cu, cg)) {
           break;                                    // kInvokeTgt := arg0_ref->entrypoint
         }
-      } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+      } else {
         break;
       }
       // Intentional fallthrough for x86
@@ -1683,31 +1684,6 @@ void Mir2Lir::GenInvoke(CallInfo* info) {
   GenInvokeNoInline(info);
 }
 
-static LIR* GenInvokeNoInlineCall(Mir2Lir* mir_to_lir, InvokeType type) {
-  QuickEntrypointEnum trampoline;
-  switch (type) {
-    case kInterface:
-      trampoline = kQuickInvokeInterfaceTrampolineWithAccessCheck;
-      break;
-    case kDirect:
-      trampoline = kQuickInvokeDirectTrampolineWithAccessCheck;
-      break;
-    case kStatic:
-      trampoline = kQuickInvokeStaticTrampolineWithAccessCheck;
-      break;
-    case kSuper:
-      trampoline = kQuickInvokeSuperTrampolineWithAccessCheck;
-      break;
-    case kVirtual:
-      trampoline = kQuickInvokeVirtualTrampolineWithAccessCheck;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected invoke type";
-      trampoline = kQuickInvokeInterfaceTrampolineWithAccessCheck;
-  }
-  return mir_to_lir->InvokeTrampoline(kOpBlx, RegStorage::InvalidReg(), trampoline);
-}
-
 void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
   int call_state = 0;
   LIR* null_ck;
@@ -1721,7 +1697,7 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
   cu_->compiler_driver->ProcessedInvoke(method_info.GetInvokeType(), method_info.StatsFlags());
   BeginInvoke(info);
   InvokeType original_type = static_cast<InvokeType>(method_info.GetInvokeType());
-  info->type = static_cast<InvokeType>(method_info.GetSharpType());
+  info->type = method_info.GetSharpType();
   bool fast_path = method_info.FastPath();
   bool skip_this;
   if (info->type == kInterface) {
@@ -1731,10 +1707,10 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
     if (fast_path) {
       p_null_ck = &null_ck;
     }
-    next_call_insn = fast_path ? NextSDCallInsn : NextDirectCallInsnSP;
+    next_call_insn = fast_path ? GetNextSDCallInsn() : NextDirectCallInsnSP;
     skip_this = false;
   } else if (info->type == kStatic) {
-    next_call_insn = fast_path ? NextSDCallInsn : NextStaticCallInsnSP;
+    next_call_insn = fast_path ? GetNextSDCallInsn() : NextStaticCallInsnSP;
     skip_this = false;
   } else if (info->type == kSuper) {
     DCHECK(!fast_path);  // Fast path is a direct call.
@@ -1762,25 +1738,9 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
     call_state = next_call_insn(cu_, info, call_state, target_method, method_info.VTableIndex(),
                                 method_info.DirectCode(), method_info.DirectMethod(), original_type);
   }
-  LIR* call_inst;
-  if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-    call_inst = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
-  } else {
-    if (fast_path) {
-      if (method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
-        // We can have the linker fixup a call relative.
-        call_inst =
-          reinterpret_cast<X86Mir2Lir*>(this)->CallWithLinkerFixup(target_method, info->type);
-      } else {
-        call_inst = OpMem(kOpBlx, TargetReg(kArg0, kRef),
-                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
-      }
-    } else {
-      call_inst = GenInvokeNoInlineCall(this, info->type);
-    }
-  }
+  LIR* call_insn = GenCallInsn(method_info);
   EndInvoke(info);
-  MarkSafepointPC(call_inst);
+  MarkSafepointPC(call_insn);
 
   ClobberCallerSave();
   if (info->result.location != kLocInvalid) {
@@ -1795,4 +1755,14 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
   }
 }
 
+NextCallInsn Mir2Lir::GetNextSDCallInsn() {
+  return NextSDCallInsn;
+}
+
+LIR* Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
+  DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64 &&
+         cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm);
+  return OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index f3edd7eecd..6536c413e8 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -74,7 +74,7 @@ void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLoca
   int elements = table[1];
   tab_rec->targets =
       static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // The table is composed of 8-byte key/disp pairs
   int byte_size = elements * 8;
@@ -151,7 +151,7 @@ void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLoca
   int size = table[1];
   tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
                                                       kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
@@ -232,7 +232,7 @@ void MipsMir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation
   uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
   tab_rec->size = (size * width) + 8;
 
-  fill_array_data_.Insert(tab_rec);
+  fill_array_data_.push_back(tab_rec);
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 9c4426f746..d3719ab312 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -430,16 +430,16 @@ bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 }
 
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs,
-                                        dp_regs, reserved_regs, empty_pool /* reserved64 */,
-                                        core_temps, empty_pool /* core64_temps */, sp_temps,
-                                        dp_temps);
+  reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
+                                            sp_regs, dp_regs,
+                                            reserved_regs, empty_pool /* reserved64 */,
+                                            core_temps, empty_pool /* core64_temps */,
+                                            sp_temps, dp_temps));
 
   // Target-specific adjustments.
 
   // Alias single precision floats to appropriate half of overlapping double.
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int sp_reg_num = info->GetReg().GetRegNum();
 #if (FR_BIT == 0)
     int dp_reg_num = sp_reg_num & ~1;
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 22588f3e8c..0aefc2dea8 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -142,8 +142,9 @@ inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1
  */
 inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) {
   DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
-  DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg;
-  *mask = mask->Union(reginfo_map_.Get(reg)->DefUseMask());
+  DCHECK_LT(static_cast<size_t>(reg), reginfo_map_.size());
+  DCHECK(reginfo_map_[reg] != nullptr) << "No info for 0x" << reg;
+  *mask = mask->Union(reginfo_map_[reg]->DefUseMask());
 }
 
 /*
@@ -151,8 +152,9 @@ inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) {
  */
 inline void Mir2Lir::ClearRegMask(ResourceMask* mask, int reg) {
   DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
-  DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg;
-  *mask = mask->ClearBits(reginfo_map_.Get(reg)->DefUseMask());
+  DCHECK_LT(static_cast<size_t>(reg), reginfo_map_.size());
+  DCHECK(reginfo_map_[reg] != nullptr) << "No info for 0x" << reg;
+  *mask = mask->ClearBits(reginfo_map_[reg]->DefUseMask());
 }
 
 /*
@@ -256,8 +258,7 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
 }
 
 inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) {
-  RegisterInfo* res = reg.IsPair() ? reginfo_map_.Get(reg.GetLowReg()) :
-      reginfo_map_.Get(reg.GetReg());
+  RegisterInfo* res = reg.IsPair() ? reginfo_map_[reg.GetLowReg()] : reginfo_map_[reg.GetReg()];
   DCHECK(res != nullptr);
   return res;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 96f00e791c..6942c0fb11 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1268,13 +1268,12 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
 bool Mir2Lir::SpecialMIR2LIR(const InlineMethod& special) {
   cu_->NewTimingSplit("SpecialMIR2LIR");
   // Find the first DalvikByteCode block.
-  int num_reachable_blocks = mir_graph_->GetNumReachableBlocks();
+  DCHECK_EQ(mir_graph_->GetNumReachableBlocks(), mir_graph_->GetDfsOrder().size());
   BasicBlock*bb = NULL;
-  for (int idx = 0; idx < num_reachable_blocks; idx++) {
-    // TODO: no direct access of growable lists.
-    int dfs_index = mir_graph_->GetDfsOrder()->Get(idx);
-    bb = mir_graph_->GetBasicBlock(dfs_index);
-    if (bb->block_type == kDalvikByteCode) {
+  for (BasicBlockId dfs_id : mir_graph_->GetDfsOrder()) {
+    BasicBlock* candidate = mir_graph_->GetBasicBlock(dfs_id);
+    if (candidate->block_type == kDalvikByteCode) {
+      bb = candidate;
       break;
     }
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ea722ab1df..67a8c0f400 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -33,7 +33,6 @@
 #include "utils/array_ref.h"
 #include "utils/arena_allocator.h"
 #include "utils/arena_containers.h"
-#include "utils/growable_array.h"
 #include "utils/stack_checks.h"
 
 namespace art {
@@ -139,6 +138,7 @@ struct LIR;
 struct RegisterInfo;
 class DexFileMethodInliner;
 class MIRGraph;
+class MirMethodLoweringInfo;
 class Mir2Lir;
 
 typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
@@ -437,20 +437,21 @@ class Mir2Lir : public Backend {
       static void* operator new(size_t size, ArenaAllocator* arena) {
         return arena->Alloc(size, kArenaAllocRegAlloc);
       }
+      static void operator delete(void* ptr) { UNUSED(ptr); }
       void ResetNextTemp() {
         next_core_reg_ = 0;
         next_sp_reg_ = 0;
         next_dp_reg_ = 0;
       }
-      GrowableArray<RegisterInfo*> core_regs_;
+      ArenaVector<RegisterInfo*> core_regs_;
       int next_core_reg_;
-      GrowableArray<RegisterInfo*> core64_regs_;
+      ArenaVector<RegisterInfo*> core64_regs_;
       int next_core64_reg_;
-      GrowableArray<RegisterInfo*> sp_regs_;    // Single precision float.
+      ArenaVector<RegisterInfo*> sp_regs_;    // Single precision float.
       int next_sp_reg_;
-      GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
+      ArenaVector<RegisterInfo*> dp_regs_;    // Double precision float.
       int next_dp_reg_;
-      GrowableArray<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
+      ArenaVector<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
       int* next_ref_reg_;
 
      private:
@@ -597,13 +598,13 @@ class Mir2Lir : public Backend {
      * may be worth conditionally-compiling a set of identity functions here.
      */
     uint32_t WrapPointer(void* pointer) {
-      uint32_t res = pointer_storage_.Size();
-      pointer_storage_.Insert(pointer);
+      uint32_t res = pointer_storage_.size();
+      pointer_storage_.push_back(pointer);
       return res;
     }
 
     void* UnwrapPointer(size_t index) {
-      return pointer_storage_.Get(index);
+      return pointer_storage_[index];
     }
 
     // strdup(), but allocates from the arena.
@@ -713,7 +714,7 @@ class Mir2Lir : public Backend {
     void SimpleRegAlloc();
     void ResetRegPool();
     void CompilerInitPool(RegisterInfo* info, RegStorage* regs, int num);
-    void DumpRegPool(GrowableArray<RegisterInfo*>* regs);
+    void DumpRegPool(ArenaVector<RegisterInfo*>* regs);
     void DumpCoreRegPool();
     void DumpFpRegPool();
     void DumpRegPools();
@@ -728,7 +729,7 @@ class Mir2Lir : public Backend {
     RegStorage AllocPreservedFpReg(int s_reg);
     virtual RegStorage AllocPreservedSingle(int s_reg);
     virtual RegStorage AllocPreservedDouble(int s_reg);
-    RegStorage AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required);
+    RegStorage AllocTempBody(ArenaVector<RegisterInfo*>& regs, int* next_temp, bool required);
     virtual RegStorage AllocTemp(bool required = true);
     virtual RegStorage AllocTempWide(bool required = true);
     virtual RegStorage AllocTempRef(bool required = true);
@@ -739,7 +740,7 @@ class Mir2Lir : public Backend {
     void FlushReg(RegStorage reg);
     void FlushRegWide(RegStorage reg);
     RegStorage AllocLiveReg(int s_reg, int reg_class, bool wide);
-    RegStorage FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg);
+    RegStorage FindLiveReg(ArenaVector<RegisterInfo*>& regs, int s_reg);
     virtual void FreeTemp(RegStorage reg);
     virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     virtual bool IsLive(RegStorage reg);
@@ -909,6 +910,15 @@ class Mir2Lir : public Backend {
                                                             bool safepoint_pc);
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
+    virtual NextCallInsn GetNextSDCallInsn();
+
+    /*
+     * @brief Generate the actual call insn based on the method info.
+     * @param method_info the lowering info for the method call.
+     * @returns Call instruction
+     */
+    virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info);
+
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
     virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
@@ -1676,11 +1686,11 @@ class Mir2Lir : public Backend {
   protected:
     CompilationUnit* const cu_;
     MIRGraph* const mir_graph_;
-    GrowableArray<SwitchTable*> switch_tables_;
-    GrowableArray<FillArrayData*> fill_array_data_;
-    GrowableArray<RegisterInfo*> tempreg_info_;
-    GrowableArray<RegisterInfo*> reginfo_map_;
-    GrowableArray<void*> pointer_storage_;
+    ArenaVector<SwitchTable*> switch_tables_;
+    ArenaVector<FillArrayData*> fill_array_data_;
+    ArenaVector<RegisterInfo*> tempreg_info_;
+    ArenaVector<RegisterInfo*> reginfo_map_;
+    ArenaVector<void*> pointer_storage_;
     CodeOffset current_code_offset_;    // Working byte offset of machine instructons.
     CodeOffset data_offset_;            // starting offset of literal pool.
     size_t total_size_;                   // header + code size.
@@ -1697,7 +1707,7 @@ class Mir2Lir : public Backend {
      */
     DexOffset current_dalvik_offset_;
     size_t estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
-    RegisterPool* reg_pool_;
+    std::unique_ptr<RegisterPool> reg_pool_;
     /*
      * Sanity checking for the register temp tracking.  The same ssa
      * name should never be associated with one temp register per
@@ -1712,6 +1722,7 @@ class Mir2Lir : public Backend {
     ArenaVector<uint32_t> core_vmap_table_;
     ArenaVector<uint32_t> fp_vmap_table_;
     std::vector<uint8_t> native_gc_map_;
+    ArenaVector<LinkerPatch> patches_;
     int num_core_spills_;
     int num_fp_spills_;
     int frame_size_;
@@ -1720,7 +1731,7 @@ class Mir2Lir : public Backend {
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
 
-    GrowableArray<LIRSlowPath*> slow_paths_;
+    ArenaVector<LIRSlowPath*> slow_paths_;
 
     // The memory reference type for new LIRs.
     // NOTE: Passing this as an explicit parameter by all functions that directly or indirectly
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 2c5f79c29a..6f2a647313 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -489,7 +489,7 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set)
     }
   }
   return true;
-};
+}
 
 // Skip the method that we do not support currently.
 bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 195da0dad2..6305b22ded 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -28,8 +28,7 @@ namespace art {
  * live until it is either explicitly killed or reallocated.
  */
 void Mir2Lir::ResetRegPool() {
-  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
-  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+  for (RegisterInfo* info : tempreg_info_) {
     info->MarkFree();
   }
   // Reset temp tracking sanity check.
@@ -66,41 +65,38 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
                                     const ArrayRef<const RegStorage>& core64_temps,
                                     const ArrayRef<const RegStorage>& sp_temps,
                                     const ArrayRef<const RegStorage>& dp_temps) :
-    core_regs_(arena, core_regs.size()), next_core_reg_(0),
-    core64_regs_(arena, core64_regs.size()), next_core64_reg_(0),
-    sp_regs_(arena, sp_regs.size()), next_sp_reg_(0),
-    dp_regs_(arena, dp_regs.size()), next_dp_reg_(0), m2l_(m2l)  {
+    core_regs_(arena->Adapter()), next_core_reg_(0),
+    core64_regs_(arena->Adapter()), next_core64_reg_(0),
+    sp_regs_(arena->Adapter()), next_sp_reg_(0),
+    dp_regs_(arena->Adapter()), next_dp_reg_(0), m2l_(m2l)  {
   // Initialize the fast lookup map.
-  m2l_->reginfo_map_.Reset();
-  if (kIsDebugBuild) {
-    m2l_->reginfo_map_.Resize(RegStorage::kMaxRegs);
-    for (unsigned i = 0; i < RegStorage::kMaxRegs; i++) {
-      m2l_->reginfo_map_.Insert(nullptr);
-    }
-  } else {
-    m2l_->reginfo_map_.SetSize(RegStorage::kMaxRegs);
-  }
+  m2l_->reginfo_map_.clear();
+  m2l_->reginfo_map_.resize(RegStorage::kMaxRegs, nullptr);
 
   // Construct the register pool.
+  core_regs_.reserve(core_regs.size());
   for (const RegStorage& reg : core_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_.Put(reg.GetReg(), info);
-    core_regs_.Insert(info);
+    m2l_->reginfo_map_[reg.GetReg()] = info;
+    core_regs_.push_back(info);
   }
+  core64_regs_.reserve(core64_regs.size());
   for (const RegStorage& reg : core64_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_.Put(reg.GetReg(), info);
-    core64_regs_.Insert(info);
+    m2l_->reginfo_map_[reg.GetReg()] = info;
+    core64_regs_.push_back(info);
   }
+  sp_regs_.reserve(sp_regs.size());
   for (const RegStorage& reg : sp_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_.Put(reg.GetReg(), info);
-    sp_regs_.Insert(info);
+    m2l_->reginfo_map_[reg.GetReg()] = info;
+    sp_regs_.push_back(info);
   }
+  dp_regs_.reserve(dp_regs.size());
   for (const RegStorage& reg : dp_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_.Put(reg.GetReg(), info);
-    dp_regs_.Insert(info);
+    m2l_->reginfo_map_[reg.GetReg()] = info;
+    dp_regs_.push_back(info);
   }
 
   // Keep special registers from being allocated.
@@ -127,10 +123,10 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
 
   // Add an entry for InvalidReg with zero'd mask.
   RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), kEncodeNone);
-  m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
+  m2l_->reginfo_map_[RegStorage::InvalidReg().GetReg()] = invalid_reg;
 
   // Existence of core64 registers implies wide references.
-  if (core64_regs_.Size() != 0) {
+  if (core64_regs_.size() != 0) {
     ref_regs_ = &core64_regs_;
     next_ref_reg_ = &next_core64_reg_;
   } else {
@@ -139,10 +135,9 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
   }
 }
 
-void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
+void Mir2Lir::DumpRegPool(ArenaVector<RegisterInfo*>* regs) {
   LOG(INFO) << "================================================";
-  GrowableArray<RegisterInfo*>::Iterator it(regs);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : *regs) {
     LOG(INFO) << StringPrintf(
         "R[%d:%d:%c]: T:%d, U:%d, W:%d, p:%d, LV:%d, D:%d, SR:%d, DEF:%d",
         info->GetReg().GetReg(), info->GetReg().GetRegNum(), info->GetReg().IsFloat() ?  'f' : 'c',
@@ -222,8 +217,7 @@ void Mir2Lir::ClobberSReg(int s_reg) {
     if (kIsDebugBuild && s_reg == live_sreg_) {
       live_sreg_ = INVALID_SREG;
     }
-    GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
-    for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+    for (RegisterInfo* info : tempreg_info_) {
       if (info->SReg() == s_reg) {
         if (info->GetReg().NotExactlyEquals(info->Partner())) {
           // Dealing with a pair - clobber the other half.
@@ -278,8 +272,7 @@ RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
    * happens from the single or double pool.  This entire section of code could stand
    * a good refactoring.
    */
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : reg_pool_->core_regs_) {
     if (!info->IsTemp() && !info->InUse()) {
       res = info->GetReg();
       RecordCorePromotion(res, s_reg);
@@ -311,8 +304,7 @@ RegStorage Mir2Lir::AllocPreservedFpReg(int s_reg) {
    */
   DCHECK_NE(cu_->instruction_set, kThumb2);
   RegStorage res;
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : reg_pool_->sp_regs_) {
     if (!info->IsTemp() && !info->InUse()) {
       res = info->GetReg();
       RecordFpPromotion(res, s_reg);
@@ -337,13 +329,14 @@ RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
 }
 
 
-RegStorage Mir2Lir::AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required) {
-  int num_regs = regs.Size();
+RegStorage Mir2Lir::AllocTempBody(ArenaVector<RegisterInfo*>& regs, int* next_temp, bool required) {
+  int num_regs = regs.size();
   int next = *next_temp;
   for (int i = 0; i< num_regs; i++) {
-    if (next >= num_regs)
+    if (next >= num_regs) {
       next = 0;
-    RegisterInfo* info = regs.Get(next);
+    }
+    RegisterInfo* info = regs[next];
     // Try to allocate a register that doesn't hold a live value.
     if (info->IsTemp() && !info->InUse() && info->IsDead()) {
       // If it's wide, split it up.
@@ -367,9 +360,10 @@ RegStorage Mir2Lir::AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_
   next = *next_temp;
   // No free non-live regs.  Anything we can kill?
   for (int i = 0; i< num_regs; i++) {
-    if (next >= num_regs)
+    if (next >= num_regs) {
       next = 0;
-    RegisterInfo* info = regs.Get(next);
+    }
+    RegisterInfo* info = regs[next];
     if (info->IsTemp() && !info->InUse()) {
       // Got one.  Kill it.
       ClobberSReg(info->SReg());
@@ -401,7 +395,7 @@ RegStorage Mir2Lir::AllocTemp(bool required) {
 
 RegStorage Mir2Lir::AllocTempWide(bool required) {
   RegStorage res;
-  if (reg_pool_->core64_regs_.Size() != 0) {
+  if (reg_pool_->core64_regs_.size() != 0) {
     res = AllocTempBody(reg_pool_->core64_regs_, &reg_pool_->next_core64_reg_, required);
   } else {
     RegStorage low_reg = AllocTemp();
@@ -458,10 +452,9 @@ RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class, bool required) {
   return AllocTemp(required);
 }
 
-RegStorage Mir2Lir::FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg) {
+RegStorage Mir2Lir::FindLiveReg(ArenaVector<RegisterInfo*>& regs, int s_reg) {
   RegStorage res;
-  GrowableArray<RegisterInfo*>::Iterator it(&regs);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : regs) {
     if ((info->SReg() == s_reg) && info->IsLive()) {
       res = info->GetReg();
       break;
@@ -714,15 +707,13 @@ void Mir2Lir::ResetDefLocWide(RegLocation rl) {
 }
 
 void Mir2Lir::ResetDefTracking() {
-  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
-  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+  for (RegisterInfo* info : tempreg_info_) {
     info->ResetDefBody();
   }
 }
 
 void Mir2Lir::ClobberAllTemps() {
-  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
-  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+  for (RegisterInfo* info : tempreg_info_) {
     ClobberBody(info);
   }
 }
@@ -780,8 +771,7 @@ void Mir2Lir::FlushSpecificReg(RegisterInfo* info) {
 }
 
 void Mir2Lir::FlushAllRegs() {
-  GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : tempreg_info_) {
     if (info->IsDirty() && info->IsLive()) {
       FlushSpecificReg(info);
     }
@@ -853,14 +843,16 @@ void Mir2Lir::MarkLive(RegLocation loc) {
 void Mir2Lir::MarkTemp(RegStorage reg) {
   DCHECK(!reg.IsPair());
   RegisterInfo* info = GetRegInfo(reg);
-  tempreg_info_.Insert(info);
+  tempreg_info_.push_back(info);
   info->SetIsTemp(true);
 }
 
 void Mir2Lir::UnmarkTemp(RegStorage reg) {
   DCHECK(!reg.IsPair());
   RegisterInfo* info = GetRegInfo(reg);
-  tempreg_info_.Delete(info);
+  auto pos = std::find(tempreg_info_.begin(), tempreg_info_.end(), info);
+  DCHECK(pos != tempreg_info_.end());
+  tempreg_info_.erase(pos);
   info->SetIsTemp(false);
 }
 
@@ -932,8 +924,7 @@ void Mir2Lir::MarkInUse(RegStorage reg) {
 }
 
 bool Mir2Lir::CheckCorePoolSanity() {
-  GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : tempreg_info_) {
     int my_sreg = info->SReg();
     if (info->IsTemp() && info->IsLive() && info->IsWide() && my_sreg != INVALID_SREG) {
       RegStorage my_reg = info->GetReg();
@@ -1332,7 +1323,8 @@ void Mir2Lir::DoPromotion() {
 
 /* Returns sp-relative offset in bytes for a VReg */
 int Mir2Lir::VRegOffset(int v_reg) {
-  return StackVisitor::GetVRegOffset(cu_->code_item, core_spill_mask_,
+  const DexFile::CodeItem* code_item = mir_graph_->GetCurrentDexCompilationUnit()->GetCodeItem();
+  return StackVisitor::GetVRegOffset(code_item, core_spill_mask_,
                                      fp_spill_mask_, frame_size_, v_reg,
                                      cu_->instruction_set);
 }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index a9a02523e2..ab1608be91 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -189,8 +189,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 
   { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" },
   { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Movnti32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { 0x0F,          0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti32MR", "[!0r+!1d],!2r" },
-  { kX86Movnti32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { 0x0F,          0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Movnti32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { 0,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti32MR", "[!0r+!1d],!2r" },
+  { kX86Movnti32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { 0,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti32AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" },
   { kX86Mov32RR, kRegReg,    IS_MOVE  | IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" },
   { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" },
@@ -206,8 +206,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 
   { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" },
   { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Movnti64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { 0x0F,              0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti64MR", "[!0r+!1d],!2r" },
-  { kX86Movnti64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { 0x0F,              0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti64AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Movnti64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { REX_W,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti64MR", "[!0r+!1d],!2r" },
+  { kX86Movnti64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { REX_W,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" },
   { kX86Mov64RR, kRegReg,    IS_MOVE  | IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" },
   { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" },
@@ -917,22 +917,22 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
   if (r8_form) {
     // Do we need an empty REX prefix to normalize byte register addressing?
     if (RegStorage::RegNum(raw_reg_r) >= 4 && !IsByteSecondOperand(entry)) {
-      rex |= 0x40;  // REX.0000
+      rex |= REX;  // REX.0000
     } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
-      rex |= 0x40;  // REX.0000
+      rex |= REX;  // REX.0000
     }
   }
   if (w) {
-    rex |= 0x48;  // REX.W000
+    rex |= REX_W;  // REX.W000
   }
   if (r) {
-    rex |= 0x44;  // REX.0R00
+    rex |= REX_R;  // REX.0R00
   }
   if (x) {
-    rex |= 0x42;  // REX.00X0
+    rex |= REX_X;  // REX.00X0
   }
   if (b) {
-    rex |= 0x41;  // REX.000B
+    rex |= REX_B;  // REX.000B
   }
   if (entry->skeleton.prefix1 != 0) {
     if (cu_->target64 && entry->skeleton.prefix1 == THREAD_PREFIX) {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 482c430e88..441ec9e009 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -19,6 +19,8 @@
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "gc/accounting/card_table.h"
+#include "mirror/art_method.h"
+#include "mirror/object_array-inl.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -73,7 +75,7 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat
   int size = table[1];
   tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
                                                       kArenaAllocLIR));
-  switch_tables_.Insert(tab_rec);
+  switch_tables_.push_back(tab_rec);
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
@@ -145,7 +147,7 @@ void X86Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation
   uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
   tab_rec->size = (size * width) + 8;
 
-  fill_array_data_.Insert(tab_rec);
+  fill_array_data_.push_back(tab_rec);
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
@@ -330,4 +332,58 @@ void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
   MarkPossibleNullPointerException(opt_flags);
 }
 
+/*
+ * Bit of a hack here - in the absence of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                             int state, const MethodReference& target_method,
+                             uint32_t unused,
+                             uintptr_t direct_code, uintptr_t direct_method,
+                             InvokeType type) {
+  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+  if (direct_method != 0) {
+    switch (state) {
+    case 0:  // Get the current Method* [sets kArg0]
+      if (direct_method != static_cast<uintptr_t>(-1)) {
+        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+      } else {
+        cg->LoadMethodAddress(target_method, type, kArg0);
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0:  // Get the current Method* [sets kArg0]
+      // TUNING: we can save a reg copy if Method* has been promoted.
+      cg->LoadCurrMethodDirect(arg0_ref);
+      break;
+    case 1:  // Get method->dex_cache_resolved_methods_
+      cg->LoadRefDisp(arg0_ref,
+                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                      arg0_ref,
+                      kNotVolatile);
+      break;
+    case 2:  // Grab target method*
+      CHECK_EQ(cu->dex_file, target_method.dex_file);
+      cg->LoadRefDisp(arg0_ref,
+                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                          target_method.dex_method_index).Int32Value(),
+                      arg0_ref,
+                      kNotVolatile);
+      break;
+    default:
+      return -1;
+    }
+  }
+  return state + 1;
+}
+
+NextCallInsn X86Mir2Lir::GetNextSDCallInsn() {
+  return X86NextSDCallInsn;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6020e70f32..8edfc017d1 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -341,6 +341,7 @@ class X86Mir2Lir : public Mir2Lir {
 
   void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
 
+  NextCallInsn GetNextSDCallInsn() OVERRIDE;
   int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                            NextCallInsn next_call_insn,
                            const MethodReference& target_method,
@@ -361,7 +362,14 @@ class X86Mir2Lir : public Mir2Lir {
    * @param type How the method will be invoked.
    * @returns Call instruction
    */
-  virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+  LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+
+  /*
+   * @brief Generate the actual call insn based on the method info.
+   * @param method_info the lowering info for the method call.
+   * @returns Call instruction
+   */
+  LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
   /*
    * @brief Handle x86 specific literals
@@ -947,13 +955,13 @@ class X86Mir2Lir : public Mir2Lir {
   LIR* setup_method_address_[2];
 
   // Instructions needing patching with Method* values.
-  GrowableArray<LIR*> method_address_insns_;
+  ArenaVector<LIR*> method_address_insns_;
 
   // Instructions needing patching with Class Type* values.
-  GrowableArray<LIR*> class_type_address_insns_;
+  ArenaVector<LIR*> class_type_address_insns_;
 
   // Instructions needing patching with PC relative code addresses.
-  GrowableArray<LIR*> call_method_insns_;
+  ArenaVector<LIR*> call_method_insns_;
 
   // Prologue decrement of stack pointer.
   LIR* stack_decrement_;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 2ec37cd80e..4357657680 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -3031,7 +3031,6 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
       LoadValueDirectFixed(rl_rhs, t_reg);
       if (is_two_addr) {
         // Can we do this directly into memory?
-        rl_rhs = LoadValue(rl_rhs, kCoreReg);
         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         if (rl_result.location != kLocPhysReg) {
           // Okay, we can do this into memory
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index de11996b26..760358efb6 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -24,6 +24,7 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "mirror/array.h"
+#include "mirror/art_method.h"
 #include "mirror/string.h"
 #include "oat.h"
 #include "x86_lir.h"
@@ -620,13 +621,15 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
   if (cu_->target64) {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
-                                          dp_regs_64, reserved_regs_64, reserved_regs_64q,
-                                          core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64);
+    reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
+                                              dp_regs_64, reserved_regs_64, reserved_regs_64q,
+                                              core_temps_64, core_temps_64q,
+                                              sp_temps_64, dp_temps_64));
   } else {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
-                                          dp_regs_32, reserved_regs_32, empty_pool,
-                                          core_temps_32, empty_pool, sp_temps_32, dp_temps_32);
+    reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
+                                              dp_regs_32, reserved_regs_32, empty_pool,
+                                              core_temps_32, empty_pool,
+                                              sp_temps_32, dp_temps_32));
   }
 
   // Target-specific adjustments.
@@ -635,7 +638,7 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
   const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32;
   for (RegStorage reg : *xp_regs) {
     RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
-    reginfo_map_.Put(reg.GetReg(), info);
+    reginfo_map_[reg.GetReg()] = info;
   }
   const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
   for (RegStorage reg : *xp_temps) {
@@ -645,8 +648,7 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
 
   // Alias single precision xmm to double xmms.
   // TODO: as needed, add larger vector sizes - alias all to the largest.
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+  for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int sp_reg_num = info->GetReg().GetRegNum();
     RegStorage xp_reg = RegStorage::Solo128(sp_reg_num);
     RegisterInfo* xp_reg_info = GetRegInfo(xp_reg);
@@ -666,8 +668,7 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
 
   if (cu_->target64) {
     // Alias 32bit W registers to corresponding 64bit X registers.
-    GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
-    for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
+    for (RegisterInfo* info : reg_pool_->core_regs_) {
       int x_reg_num = info->GetReg().GetRegNum();
       RegStorage x_reg = RegStorage::Solo64(x_reg_num);
       RegisterInfo* x_reg_info = GetRegInfo(x_reg);
@@ -785,11 +786,14 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
-      method_address_insns_(arena, 100, kGrowableArrayMisc),
-      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
-      call_method_insns_(arena, 100, kGrowableArrayMisc),
+      method_address_insns_(arena->Adapter()),
+      class_type_address_insns_(arena->Adapter()),
+      call_method_insns_(arena->Adapter()),
       stack_decrement_(nullptr), stack_increment_(nullptr),
       const_vectors_(nullptr) {
+  method_address_insns_.reserve(100);
+  class_type_address_insns_.reserve(100);
+  call_method_insns_.reserve(100);
   store_method_addr_used_ = false;
   if (kIsDebugBuild) {
     for (int i = 0; i < kX86Last; i++) {
@@ -977,7 +981,7 @@ void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeT
                      static_cast<int>(target_method_id_ptr), target_method_idx,
                      WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
   AppendLIR(move);
-  method_address_insns_.Insert(move);
+  method_address_insns_.push_back(move);
 }
 
 void X86Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
@@ -996,28 +1000,68 @@ void X86Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
                      static_cast<int>(ptr), type_idx,
                      WrapPointer(const_cast<DexFile*>(&dex_file)));
   AppendLIR(move);
-  class_type_address_insns_.Insert(move);
+  class_type_address_insns_.push_back(move);
 }
 
-LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
+LIR* X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
   /*
    * For x86, just generate a 32 bit call relative instruction, that will be filled
-   * in at 'link time'.  For now, put a unique value based on target to ensure that
-   * code deduplication works.
+   * in at 'link time'.
    */
   int target_method_idx = target_method.dex_method_index;
   const DexFile* target_dex_file = target_method.dex_file;
-  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
-  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
 
   // Generate the call instruction with the unique pointer and save index, dex_file, and type.
-  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
+  // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
+  // as a placeholder for the offset.
+  LIR* call = RawLIR(current_dalvik_offset_, kX86CallI, 0,
                      target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
   AppendLIR(call);
-  call_method_insns_.Insert(call);
+  call_method_insns_.push_back(call);
   return call;
 }
 
+static LIR* GenInvokeNoInlineCall(Mir2Lir* mir_to_lir, InvokeType type) {
+  QuickEntrypointEnum trampoline;
+  switch (type) {
+    case kInterface:
+      trampoline = kQuickInvokeInterfaceTrampolineWithAccessCheck;
+      break;
+    case kDirect:
+      trampoline = kQuickInvokeDirectTrampolineWithAccessCheck;
+      break;
+    case kStatic:
+      trampoline = kQuickInvokeStaticTrampolineWithAccessCheck;
+      break;
+    case kSuper:
+      trampoline = kQuickInvokeSuperTrampolineWithAccessCheck;
+      break;
+    case kVirtual:
+      trampoline = kQuickInvokeVirtualTrampolineWithAccessCheck;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected invoke type";
+      trampoline = kQuickInvokeInterfaceTrampolineWithAccessCheck;
+  }
+  return mir_to_lir->InvokeTrampoline(kOpBlx, RegStorage::InvalidReg(), trampoline);
+}
+
+LIR* X86Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
+  LIR* call_insn;
+  if (method_info.FastPath()) {
+    if (method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
+      // We can have the linker fixup a call relative.
+      call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
+    } else {
+      call_insn = OpMem(kOpBlx, TargetReg(kArg0, kRef),
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+    }
+  } else {
+    call_insn = GenInvokeNoInlineCall(this, method_info.GetSharpType());
+  }
+  return call_insn;
+}
+
 void X86Mir2Lir::InstallLiteralPools() {
   // These are handled differently for x86.
   DCHECK(code_literal_list_ == nullptr);
@@ -1045,8 +1089,7 @@ void X86Mir2Lir::InstallLiteralPools() {
   }
 
   // Handle the fixups for methods.
-  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
-      LIR* p = method_address_insns_.Get(i);
+  for (LIR* p : method_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
       uint32_t target_method_idx = p->operands[2];
       const DexFile* target_dex_file =
@@ -1054,32 +1097,27 @@ void X86Mir2Lir::InstallLiteralPools() {
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
-      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
-                                           cu_->method_idx, cu_->invoke_type,
-                                           target_method_idx, target_dex_file,
-                                           static_cast<InvokeType>(p->operands[4]),
-                                           patch_offset);
+      patches_.push_back(LinkerPatch::MethodPatch(patch_offset,
+                                                  target_dex_file, target_method_idx));
   }
 
   // Handle the fixups for class types.
-  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
-      LIR* p = class_type_address_insns_.Get(i);
+  for (LIR* p : class_type_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
 
       const DexFile* class_dex_file =
         reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
-      uint32_t target_method_idx = p->operands[2];
+      uint32_t target_type_idx = p->operands[2];
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
-      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
-                                          cu_->method_idx, target_method_idx, class_dex_file,
-                                          patch_offset);
+      patches_.push_back(LinkerPatch::TypePatch(patch_offset,
+                                                class_dex_file, target_type_idx));
   }
 
   // And now the PC-relative calls to methods.
-  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
-      LIR* p = call_method_insns_.Get(i);
+  patches_.reserve(call_method_insns_.size());
+  for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kX86CallI);
       uint32_t target_method_idx = p->operands[1];
       const DexFile* target_dex_file =
@@ -1087,11 +1125,8 @@ void X86Mir2Lir::InstallLiteralPools() {
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
-      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
-                                                 cu_->method_idx, cu_->invoke_type,
-                                                 target_method_idx, target_dex_file,
-                                                 static_cast<InvokeType>(p->operands[3]),
-                                                 patch_offset, -4 /* offset */);
+      patches_.push_back(LinkerPatch::RelativeCodePatch(patch_offset,
+                                                        target_dex_file, target_method_idx));
   }
 
   // And do the normal processing.
@@ -1577,11 +1612,11 @@ void X86Mir2Lir::ReserveVectorRegisters(MIR* mir) {
     for (RegisterInfo *info = xp_reg_info->GetAliasChain();
                        info != nullptr;
                        info = info->GetAliasChain()) {
-      if (info->GetReg().IsSingle()) {
-        reg_pool_->sp_regs_.Delete(info);
-      } else {
-        reg_pool_->dp_regs_.Delete(info);
-      }
+      ArenaVector<RegisterInfo*>* regs =
+          info->GetReg().IsSingle() ? &reg_pool_->sp_regs_ : &reg_pool_->dp_regs_;
+      auto it = std::find(regs->begin(), regs->end(), info);
+      DCHECK(it != regs->end());
+      regs->erase(it);
     }
   }
 }
@@ -1595,9 +1630,9 @@ void X86Mir2Lir::ReturnVectorRegisters(MIR* mir) {
                        info != nullptr;
                        info = info->GetAliasChain()) {
       if (info->GetReg().IsSingle()) {
-        reg_pool_->sp_regs_.Insert(info);
+        reg_pool_->sp_regs_.push_back(info);
       } else {
-        reg_pool_->dp_regs_.Insert(info);
+        reg_pool_->dp_regs_.push_back(info);
       }
     }
   }
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index a2b9bb7760..3cc573b9c9 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -45,12 +45,7 @@ BasicBlock* MIRGraph::NextUnvisitedSuccessor(BasicBlock* bb) {
     res = NeedsVisit(GetBasicBlock(bb->taken));
     if (res == NULL) {
       if (bb->successor_block_list_type != kNotUsed) {
-        GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks);
-        while (true) {
-          SuccessorBlockInfo *sbi = iterator.Next();
-          if (sbi == NULL) {
-            break;
-          }
+        for (SuccessorBlockInfo* sbi : bb->successor_blocks) {
           res = NeedsVisit(GetBasicBlock(sbi->block));
           if (res != NULL) {
             break;
@@ -66,7 +61,7 @@ void MIRGraph::MarkPreOrder(BasicBlock* block) {
   block->visited = true;
   /* Enqueue the pre_order block id */
   if (block->id != NullBasicBlockId) {
-    dfs_order_->Insert(block->id);
+    dfs_order_.push_back(block->id);
   }
 }
 
@@ -83,9 +78,9 @@ void MIRGraph::RecordDFSOrders(BasicBlock* block) {
       succ.push_back(next_successor);
       continue;
     }
-    curr->dfs_id = dfs_post_order_->Size();
+    curr->dfs_id = dfs_post_order_.size();
     if (curr->id != NullBasicBlockId) {
-      dfs_post_order_->Insert(curr->id);
+      dfs_post_order_.push_back(curr->id);
     }
     succ.pop_back();
   }
@@ -93,23 +88,11 @@ void MIRGraph::RecordDFSOrders(BasicBlock* block) {
 
 /* Sort the blocks by the Depth-First-Search */
 void MIRGraph::ComputeDFSOrders() {
-  /* Initialize or reset the DFS pre_order list */
-  if (dfs_order_ == NULL) {
-    dfs_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks(),
-                                                          kGrowableArrayDfsOrder);
-  } else {
-    /* Just reset the used length on the counter */
-    dfs_order_->Reset();
-  }
-
-  /* Initialize or reset the DFS post_order list */
-  if (dfs_post_order_ == NULL) {
-    dfs_post_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks(),
-                                                               kGrowableArrayDfsPostOrder);
-  } else {
-    /* Just reset the used length on the counter */
-    dfs_post_order_->Reset();
-  }
+  /* Clear the DFS pre-order and post-order lists. */
+  dfs_order_.clear();
+  dfs_order_.reserve(GetNumBlocks());
+  dfs_post_order_.clear();
+  dfs_post_order_.reserve(GetNumBlocks());
 
   // Reset visited flags from all nodes
   ClearAllVisitedFlags();
@@ -117,7 +100,7 @@ void MIRGraph::ComputeDFSOrders() {
   // Record dfs orders
   RecordDFSOrders(GetEntryBlock());
 
-  num_reachable_blocks_ = dfs_order_->Size();
+  num_reachable_blocks_ = dfs_order_.size();
 
   if (num_reachable_blocks_ != num_blocks_) {
     // Hide all unreachable blocks.
@@ -181,14 +164,10 @@ void MIRGraph::ComputeDefBlockMatrix() {
 }
 
 void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) {
-  if (dom_post_order_traversal_ == NULL || max_num_reachable_blocks_ < num_reachable_blocks_) {
-    // First time or too small - create the array.
-    dom_post_order_traversal_ =
-        new (arena_) GrowableArray<BasicBlockId>(arena_, num_reachable_blocks_,
-                                        kGrowableArrayDomPostOrderTraversal);
-  } else {
-    dom_post_order_traversal_->Reset();
-  }
+  // Clear the dominator post-order list.
+  dom_post_order_traversal_.clear();
+  dom_post_order_traversal_.reserve(num_reachable_blocks_);
+
   ClearAllVisitedFlags();
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   ScopedArenaVector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack(
@@ -211,7 +190,7 @@ void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) {
     } else {
       // no successor/next
       if (curr_bb->id != NullBasicBlockId) {
-        dom_post_order_traversal_->Insert(curr_bb->id);
+        dom_post_order_traversal_.push_back(curr_bb->id);
       }
       work_stack.pop_back();
 
@@ -247,15 +226,10 @@ bool MIRGraph::ComputeDominanceFrontier(BasicBlock* bb) {
     CheckForDominanceFrontier(bb, GetBasicBlock(bb->fall_through));
   }
   if (bb->successor_block_list_type != kNotUsed) {
-    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks);
-      while (true) {
-        SuccessorBlockInfo *successor_block_info = iterator.Next();
-        if (successor_block_info == NULL) {
-          break;
-        }
-        BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
-        CheckForDominanceFrontier(bb, succ_bb);
-      }
+    for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+      BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
+      CheckForDominanceFrontier(bb, succ_bb);
+    }
   }
 
   /* Calculate DF_up */
@@ -319,13 +293,14 @@ bool MIRGraph::ComputeblockIDom(BasicBlock* bb) {
   }
 
   /* Iterate through the predecessors */
-  GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
+  auto it = bb->predecessors.begin(), end = bb->predecessors.end();
 
   /* Find the first processed predecessor */
   int idom = -1;
-  while (true) {
-    BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-    CHECK(pred_bb != NULL);
+  for ( ; ; ++it) {
+    CHECK(it != end);
+    BasicBlock* pred_bb = GetBasicBlock(*it);
+    DCHECK(pred_bb != nullptr);
     if (i_dom_list_[pred_bb->dfs_id] != NOTVISITED) {
       idom = pred_bb->dfs_id;
       break;
@@ -333,11 +308,9 @@ bool MIRGraph::ComputeblockIDom(BasicBlock* bb) {
   }
 
   /* Scan the rest of the predecessors */
-  while (true) {
-      BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-      if (!pred_bb) {
-        break;
-      }
+  for ( ; it != end; ++it) {
+      BasicBlock* pred_bb = GetBasicBlock(*it);
+      DCHECK(pred_bb != nullptr);
       if (i_dom_list_[pred_bb->dfs_id] == NOTVISITED) {
         continue;
       } else {
@@ -370,7 +343,7 @@ bool MIRGraph::SetDominators(BasicBlock* bb) {
   if (bb != GetEntryBlock()) {
     int idom_dfs_idx = i_dom_list_[bb->dfs_id];
     DCHECK_NE(idom_dfs_idx, NOTVISITED);
-    int i_dom_idx = dfs_post_order_->Get(idom_dfs_idx);
+    int i_dom_idx = dfs_post_order_[idom_dfs_idx];
     BasicBlock* i_dom = GetBasicBlock(i_dom_idx);
     bb->i_dom = i_dom->id;
     /* Add bb to the i_dominated set of the immediate dominator block */
@@ -474,12 +447,7 @@ bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
     ComputeSuccLineIn(temp_dalvik_register_v, bb_fall_through->data_flow_info->live_in_v,
                       bb->data_flow_info->def_v);
   if (bb->successor_block_list_type != kNotUsed) {
-    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks);
-    while (true) {
-      SuccessorBlockInfo *successor_block_info = iterator.Next();
-      if (successor_block_info == NULL) {
-        break;
-      }
+    for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
       BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
       if (succ_bb->data_flow_info) {
         ComputeSuccLineIn(temp_dalvik_register_v, succ_bb->data_flow_info->live_in_v,
@@ -556,8 +524,7 @@ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) {
     int v_reg = SRegToVReg(ssa_reg);
 
     /* Iterate through the predecessors */
-    GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    size_t num_uses = bb->predecessors->Size();
+    size_t num_uses = bb->predecessors.size();
     AllocateSSAUseData(mir, num_uses);
     int* uses = mir->ssa_rep->uses;
     BasicBlockId* incoming =
@@ -565,14 +532,12 @@ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) {
                                                  kArenaAllocDFInfo));
     mir->meta.phi_incoming = incoming;
     int idx = 0;
-    while (true) {
-      BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-      if (!pred_bb) {
-       break;
-      }
+    for (BasicBlockId pred_id : bb->predecessors) {
+      BasicBlock* pred_bb = GetBasicBlock(pred_id);
+      DCHECK(pred_bb != nullptr);
       int ssa_reg = pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
       uses[idx] = ssa_reg;
-      incoming[idx] = pred_bb->id;
+      incoming[idx] = pred_id;
       idx++;
     }
   }
@@ -607,12 +572,7 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) {
     memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size);
   }
   if (block->successor_block_list_type != kNotUsed) {
-    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(block->successor_blocks);
-    while (true) {
-      SuccessorBlockInfo *successor_block_info = iterator.Next();
-      if (successor_block_info == NULL) {
-        break;
-      }
+    for (SuccessorBlockInfo* successor_block_info : block->successor_blocks) {
       BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
       DoDFSPreOrderSSARename(succ_bb);
       /* Restore SSA map snapshot */
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 990c1c87cf..cdb816d560 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -342,6 +342,8 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
       freezing_constructor_lock_("freezing constructor lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
+      compiled_methods_(),
+      non_relative_linker_patch_count_(0u),
       image_(image),
       image_classes_(image_classes),
       thread_count_(thread_count),
@@ -426,18 +428,6 @@ CompilerDriver::~CompilerDriver() {
     MutexLock mu(self, compiled_methods_lock_);
     STLDeleteValues(&compiled_methods_);
   }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
-    STLDeleteElements(&code_to_patch_);
-  }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
-    STLDeleteElements(&methods_to_patch_);
-  }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
-    STLDeleteElements(&classes_to_patch_);
-  }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
   compiler_->UnInit();
 }
@@ -1320,77 +1310,6 @@ bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc
   return result;
 }
 
-void CompilerDriver::AddCodePatch(const DexFile* dex_file,
-                                  uint16_t referrer_class_def_idx,
-                                  uint32_t referrer_method_idx,
-                                  InvokeType referrer_invoke_type,
-                                  uint32_t target_method_idx,
-                                  const DexFile* target_dex_file,
-                                  InvokeType target_invoke_type,
-                                  size_t literal_offset) {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  code_to_patch_.push_back(new CallPatchInformation(dex_file,
-                                                    referrer_class_def_idx,
-                                                    referrer_method_idx,
-                                                    referrer_invoke_type,
-                                                    target_method_idx,
-                                                    target_dex_file,
-                                                    target_invoke_type,
-                                                    literal_offset));
-}
-void CompilerDriver::AddRelativeCodePatch(const DexFile* dex_file,
-                                          uint16_t referrer_class_def_idx,
-                                          uint32_t referrer_method_idx,
-                                          InvokeType referrer_invoke_type,
-                                          uint32_t target_method_idx,
-                                          const DexFile* target_dex_file,
-                                          InvokeType target_invoke_type,
-                                          size_t literal_offset,
-                                          int32_t pc_relative_offset) {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  code_to_patch_.push_back(new RelativeCallPatchInformation(dex_file,
-                                                            referrer_class_def_idx,
-                                                            referrer_method_idx,
-                                                            referrer_invoke_type,
-                                                            target_method_idx,
-                                                            target_dex_file,
-                                                            target_invoke_type,
-                                                            literal_offset,
-                                                            pc_relative_offset));
-}
-void CompilerDriver::AddMethodPatch(const DexFile* dex_file,
-                                    uint16_t referrer_class_def_idx,
-                                    uint32_t referrer_method_idx,
-                                    InvokeType referrer_invoke_type,
-                                    uint32_t target_method_idx,
-                                    const DexFile* target_dex_file,
-                                    InvokeType target_invoke_type,
-                                    size_t literal_offset) {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  methods_to_patch_.push_back(new CallPatchInformation(dex_file,
-                                                       referrer_class_def_idx,
-                                                       referrer_method_idx,
-                                                       referrer_invoke_type,
-                                                       target_method_idx,
-                                                       target_dex_file,
-                                                       target_invoke_type,
-                                                       literal_offset));
-}
-void CompilerDriver::AddClassPatch(const DexFile* dex_file,
-                                    uint16_t referrer_class_def_idx,
-                                    uint32_t referrer_method_idx,
-                                    uint32_t target_type_idx,
-                                    const DexFile* target_type_dex_file,
-                                    size_t literal_offset) {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  classes_to_patch_.push_back(new TypePatchInformation(dex_file,
-                                                       referrer_class_def_idx,
-                                                       referrer_method_idx,
-                                                       target_type_idx,
-                                                       target_type_dex_file,
-                                                       literal_offset));
-}
-
 class ParallelCompilationManager {
  public:
   typedef void Callback(const ParallelCompilationManager* manager, size_t index);
@@ -2076,11 +1995,19 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t
 
   Thread* self = Thread::Current();
   if (compiled_method != nullptr) {
+    // Count non-relative linker patches.
+    size_t non_relative_linker_patch_count = 0u;
+    for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+      if (patch.Type() != kLinkerPatchCallRelative) {
+        ++non_relative_linker_patch_count;
+      }
+    }
     MethodReference ref(&dex_file, method_idx);
     DCHECK(GetCompiledMethod(ref) == nullptr) << PrettyMethod(method_idx, dex_file);
     {
       MutexLock mu(self, compiled_methods_lock_);
       compiled_methods_.Put(ref, compiled_method);
+      non_relative_linker_patch_count_ += non_relative_linker_patch_count;
     }
     DCHECK(GetCompiledMethod(ref) != nullptr) << PrettyMethod(method_idx, dex_file);
   }
@@ -2138,6 +2065,11 @@ CompiledMethod* CompilerDriver::GetCompiledMethod(MethodReference ref) const {
   return it->second;
 }
 
+size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
+  MutexLock mu(Thread::Current(), compiled_methods_lock_);
+  return non_relative_linker_patch_count_;
+}
+
 void CompilerDriver::AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
                                                    uint16_t class_def_index) {
   WriterMutexLock mu(self, freezing_constructor_lock_);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index e7bd35776a..c445683500 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -169,6 +169,8 @@ class CompilerDriver {
 
   CompiledMethod* GetCompiledMethod(MethodReference ref) const
       LOCKS_EXCLUDED(compiled_methods_lock_);
+  size_t GetNonRelativeLinkerPatchCount() const
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
   void AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
                                      uint16_t class_def_index);
@@ -313,43 +315,6 @@ class CompilerDriver {
   const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const;
   bool IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc);
 
-  // Record patch information for later fix up.
-  void AddCodePatch(const DexFile* dex_file,
-                    uint16_t referrer_class_def_idx,
-                    uint32_t referrer_method_idx,
-                    InvokeType referrer_invoke_type,
-                    uint32_t target_method_idx,
-                    const DexFile* target_dex_file,
-                    InvokeType target_invoke_type,
-                    size_t literal_offset)
-      LOCKS_EXCLUDED(compiled_methods_lock_);
-  void AddRelativeCodePatch(const DexFile* dex_file,
-                            uint16_t referrer_class_def_idx,
-                            uint32_t referrer_method_idx,
-                            InvokeType referrer_invoke_type,
-                            uint32_t target_method_idx,
-                            const DexFile* target_dex_file,
-                            InvokeType target_invoke_type,
-                            size_t literal_offset,
-                            int32_t pc_relative_offset)
-      LOCKS_EXCLUDED(compiled_methods_lock_);
-  void AddMethodPatch(const DexFile* dex_file,
-                      uint16_t referrer_class_def_idx,
-                      uint32_t referrer_method_idx,
-                      InvokeType referrer_invoke_type,
-                      uint32_t target_method_idx,
-                      const DexFile* target_dex_file,
-                      InvokeType target_invoke_type,
-                      size_t literal_offset)
-      LOCKS_EXCLUDED(compiled_methods_lock_);
-  void AddClassPatch(const DexFile* dex_file,
-                     uint16_t referrer_class_def_idx,
-                     uint32_t referrer_method_idx,
-                     uint32_t target_method_idx,
-                     const DexFile* target_dex_file,
-                     size_t literal_offset)
-      LOCKS_EXCLUDED(compiled_methods_lock_);
-
   bool GetSupportBootImageFixup() const {
     return support_boot_image_fixup_;
   }
@@ -386,9 +351,6 @@ class CompilerDriver {
     return thread_count_;
   }
 
-  class CallPatchInformation;
-  class TypePatchInformation;
-
   bool GetDumpPasses() const {
     return dump_passes_;
   }
@@ -397,189 +359,6 @@ class CompilerDriver {
     return timings_logger_;
   }
 
-  class PatchInformation {
-   public:
-    const DexFile& GetDexFile() const {
-      return *dex_file_;
-    }
-    uint16_t GetReferrerClassDefIdx() const {
-      return referrer_class_def_idx_;
-    }
-    uint32_t GetReferrerMethodIdx() const {
-      return referrer_method_idx_;
-    }
-    size_t GetLiteralOffset() const {
-      return literal_offset_;
-    }
-
-    virtual bool IsCall() const {
-      return false;
-    }
-    virtual bool IsType() const {
-      return false;
-    }
-    virtual const CallPatchInformation* AsCall() const {
-      LOG(FATAL) << "Unreachable";
-      return nullptr;
-    }
-    virtual const TypePatchInformation* AsType() const {
-      LOG(FATAL) << "Unreachable";
-      return nullptr;
-    }
-
-   protected:
-    PatchInformation(const DexFile* dex_file,
-                     uint16_t referrer_class_def_idx,
-                     uint32_t referrer_method_idx,
-                     size_t literal_offset)
-      : dex_file_(dex_file),
-        referrer_class_def_idx_(referrer_class_def_idx),
-        referrer_method_idx_(referrer_method_idx),
-        literal_offset_(literal_offset) {
-      CHECK(dex_file_ != nullptr);
-    }
-    virtual ~PatchInformation() {}
-
-    const DexFile* const dex_file_;
-    const uint16_t referrer_class_def_idx_;
-    const uint32_t referrer_method_idx_;
-    const size_t literal_offset_;
-
-    friend class CompilerDriver;
-  };
-
-  class CallPatchInformation : public PatchInformation {
-   public:
-    InvokeType GetReferrerInvokeType() const {
-      return referrer_invoke_type_;
-    }
-    uint32_t GetTargetMethodIdx() const {
-      return target_method_idx_;
-    }
-    const DexFile* GetTargetDexFile() const {
-      return target_dex_file_;
-    }
-    InvokeType GetTargetInvokeType() const {
-      return target_invoke_type_;
-    }
-
-    const CallPatchInformation* AsCall() const {
-      return this;
-    }
-    bool IsCall() const {
-      return true;
-    }
-    virtual bool IsRelative() const {
-      return false;
-    }
-    virtual int RelativeOffset() const {
-      return 0;
-    }
-
-   protected:
-    CallPatchInformation(const DexFile* dex_file,
-                         uint16_t referrer_class_def_idx,
-                         uint32_t referrer_method_idx,
-                         InvokeType referrer_invoke_type,
-                         uint32_t target_method_idx,
-                         const DexFile* target_dex_file,
-                         InvokeType target_invoke_type,
-                         size_t literal_offset)
-        : PatchInformation(dex_file, referrer_class_def_idx,
-                           referrer_method_idx, literal_offset),
-          referrer_invoke_type_(referrer_invoke_type),
-          target_method_idx_(target_method_idx),
-          target_dex_file_(target_dex_file),
-          target_invoke_type_(target_invoke_type) {
-    }
-
-   private:
-    const InvokeType referrer_invoke_type_;
-    const uint32_t target_method_idx_;
-    const DexFile* target_dex_file_;
-    const InvokeType target_invoke_type_;
-
-    friend class CompilerDriver;
-    DISALLOW_COPY_AND_ASSIGN(CallPatchInformation);
-  };
-
-  class RelativeCallPatchInformation : public CallPatchInformation {
-   public:
-    bool IsRelative() const {
-      return true;
-    }
-    int RelativeOffset() const {
-      return offset_;
-    }
-
-   private:
-    RelativeCallPatchInformation(const DexFile* dex_file,
-                                 uint16_t referrer_class_def_idx,
-                                 uint32_t referrer_method_idx,
-                                 InvokeType referrer_invoke_type,
-                                 uint32_t target_method_idx,
-                                 const DexFile* target_dex_file,
-                                 InvokeType target_invoke_type,
-                                 size_t literal_offset,
-                                 int32_t pc_relative_offset)
-        : CallPatchInformation(dex_file, referrer_class_def_idx,
-                           referrer_method_idx, referrer_invoke_type, target_method_idx,
-                           target_dex_file, target_invoke_type, literal_offset),
-          offset_(pc_relative_offset) {
-    }
-
-    const int offset_;
-
-    friend class CompilerDriver;
-    DISALLOW_COPY_AND_ASSIGN(RelativeCallPatchInformation);
-  };
-
-  class TypePatchInformation : public PatchInformation {
-   public:
-    const DexFile& GetTargetTypeDexFile() const {
-      return *target_type_dex_file_;
-    }
-
-    uint32_t GetTargetTypeIdx() const {
-      return target_type_idx_;
-    }
-
-    bool IsType() const {
-      return true;
-    }
-    const TypePatchInformation* AsType() const {
-      return this;
-    }
-
-   private:
-    TypePatchInformation(const DexFile* dex_file,
-                         uint16_t referrer_class_def_idx,
-                         uint32_t referrer_method_idx,
-                         uint32_t target_type_idx,
-                         const DexFile* target_type_dex_file,
-                         size_t literal_offset)
-        : PatchInformation(dex_file, referrer_class_def_idx,
-                           referrer_method_idx, literal_offset),
-          target_type_idx_(target_type_idx), target_type_dex_file_(target_type_dex_file) {
-    }
-
-    const uint32_t target_type_idx_;
-    const DexFile* target_type_dex_file_;
-
-    friend class CompilerDriver;
-    DISALLOW_COPY_AND_ASSIGN(TypePatchInformation);
-  };
-
-  const std::vector<const CallPatchInformation*>& GetCodeToPatch() const {
-    return code_to_patch_;
-  }
-  const std::vector<const CallPatchInformation*>& GetMethodsToPatch() const {
-    return methods_to_patch_;
-  }
-  const std::vector<const TypePatchInformation*>& GetClassesToPatch() const {
-    return classes_to_patch_;
-  }
-
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const char* descriptor) const;
 
@@ -689,10 +468,6 @@ class CompilerDriver {
   static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  std::vector<const CallPatchInformation*> code_to_patch_;
-  std::vector<const CallPatchInformation*> methods_to_patch_;
-  std::vector<const TypePatchInformation*> classes_to_patch_;
-
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -715,6 +490,9 @@ class CompilerDriver {
   // All method references that this compiler has compiled.
   mutable Mutex compiled_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   MethodTable compiled_methods_ GUARDED_BY(compiled_methods_lock_);
+  // Number of non-relative patches in all compiled methods. These patches need space
+  // in the .oat_patches ELF section if requested in the compiler options.
+  size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
 
   const bool image_;
 
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index 840b0adf49..986fc719fe 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -23,18 +23,6 @@
 
 namespace art {
 
-DexCompilationUnit::DexCompilationUnit(CompilationUnit* cu)
-    : cu_(cu),
-      class_loader_(cu->class_loader),
-      class_linker_(cu->class_linker),
-      dex_file_(cu->dex_file),
-      code_item_(cu->code_item),
-      class_def_idx_(cu->class_def_idx),
-      dex_method_idx_(cu->method_idx),
-      access_flags_(cu->access_flags),
-      verified_method_(cu_->compiler_driver->GetVerifiedMethod(cu->dex_file, cu->method_idx)) {
-}
-
 DexCompilationUnit::DexCompilationUnit(CompilationUnit* cu,
                                        jobject class_loader,
                                        ClassLinker* class_linker,
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 35320f5bc8..3be2478e49 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_ELF_BUILDER_H_
 #define ART_COMPILER_ELF_BUILDER_H_
 
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
@@ -354,12 +355,126 @@ class ElfSymtabBuilder : public ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr>
   ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> strtab_;
 };
 
+template <typename Elf_Word>
+class ElfFilePiece {
+ public:
+  virtual ~ElfFilePiece() {}
+
+  virtual bool Write(File* elf_file) {
+    if (static_cast<off_t>(offset_) != lseek(elf_file->Fd(), offset_, SEEK_SET)) {
+      PLOG(ERROR) << "Failed to seek to " << GetDescription() << " offset " << offset_ << " for "
+          << elf_file->GetPath();
+      return false;
+    }
+
+    return DoActualWrite(elf_file);
+  }
+
+  static bool Compare(ElfFilePiece* a, ElfFilePiece* b) {
+    return a->offset_ < b->offset_;
+  }
+
+ protected:
+  explicit ElfFilePiece(Elf_Word offset) : offset_(offset) {}
+
+  virtual std::string GetDescription() = 0;
+  virtual bool DoActualWrite(File* elf_file) = 0;
+
+  Elf_Word offset_;
+};
+
+template <typename Elf_Word>
+class ElfFileMemoryPiece : public ElfFilePiece<Elf_Word> {
+ public:
+  ElfFileMemoryPiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size)
+      : ElfFilePiece<Elf_Word>(offset), dbg_name_(name), data_(data), size_(size) {}
+
+  bool DoActualWrite(File* elf_file) OVERRIDE {
+    DCHECK(data_ != nullptr || size_ == 0U) << dbg_name_ << " " << size_;
+
+    if (!elf_file->WriteFully(data_, size_)) {
+      PLOG(ERROR) << "Failed to write " << dbg_name_ << " for " << elf_file->GetPath();
+      return false;
+    }
+
+    return true;
+  }
+
+  std::string GetDescription() OVERRIDE {
+    return dbg_name_;
+  }
+
+ private:
+  const std::string& dbg_name_;
+  const void *data_;
+  Elf_Word size_;
+};
+
 class CodeOutput {
  public:
+  virtual void SetCodeOffset(size_t offset) = 0;
   virtual bool Write(OutputStream* out) = 0;
   virtual ~CodeOutput() {}
 };
 
+template <typename Elf_Word>
+class ElfFileRodataPiece : public ElfFilePiece<Elf_Word> {
+ public:
+  ElfFileRodataPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
+      output_(output) {}
+
+  bool DoActualWrite(File* elf_file) OVERRIDE {
+    output_->SetCodeOffset(this->offset_);
+    std::unique_ptr<BufferedOutputStream> output_stream(
+        new BufferedOutputStream(new FileOutputStream(elf_file)));
+    if (!output_->Write(output_stream.get())) {
+      PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file->GetPath();
+      return false;
+    }
+
+    return true;
+  }
+
+  std::string GetDescription() OVERRIDE {
+    return ".rodata";
+  }
+
+ private:
+  CodeOutput* output_;
+};
+
+template <typename Elf_Word>
+class ElfFileOatTextPiece : public ElfFilePiece<Elf_Word> {
+ public:
+  ElfFileOatTextPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
+      output_(output) {}
+
+  bool DoActualWrite(File* elf_file) OVERRIDE {
+    // All data is written by the ElfFileRodataPiece right now, as the oat writer writes in one
+    // piece. This is for future flexibility.
+    UNUSED(output_);
+    return true;
+  }
+
+  std::string GetDescription() OVERRIDE {
+    return ".text";
+  }
+
+ private:
+  CodeOutput* output_;
+};
+
+template <typename Elf_Word>
+static bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces, File* elf_file) {
+  // TODO It would be nice if this checked for overlap.
+  for (auto it = pieces.begin(); it != pieces.end(); ++it) {
+    if (!(*it)->Write(elf_file)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 template <typename Elf_Word, typename Elf_Shdr>
 static inline constexpr Elf_Word NextOffset(const Elf_Shdr& cur, const Elf_Shdr& prev) {
   return RoundUp(prev.sh_size + prev.sh_offset, cur.sh_addralign);
@@ -667,7 +782,7 @@ class ElfBuilder FINAL {
   }
 
   bool Write() {
-    std::vector<ElfFilePiece> pieces;
+    std::vector<ElfFilePiece<Elf_Word>*> pieces;
     Elf_Shdr prev = dynamic_builder_.section_;
     std::string strtab;
 
@@ -746,8 +861,9 @@ class ElfBuilder FINAL {
       it->section_.sh_addr = 0;
       it->section_.sh_size = it->GetBuffer()->size();
       it->section_.sh_link = it->GetLink();
-      pieces.push_back(ElfFilePiece(it->name_, it->section_.sh_offset,
-                                    it->GetBuffer()->data(), it->GetBuffer()->size()));
+
+      // We postpone adding an ElfFilePiece to keep the order in "pieces."
+
       prev = it->section_;
       if (debug_logging_) {
         LOG(INFO) << it->name_ << " off=" << it->section_.sh_offset
@@ -824,55 +940,62 @@ class ElfBuilder FINAL {
     elf_header_.e_shstrndx = shstrtab_builder_.section_index_;
 
     // Add the rest of the pieces to the list.
-    pieces.push_back(ElfFilePiece("Elf Header", 0, &elf_header_, sizeof(elf_header_)));
-    pieces.push_back(ElfFilePiece("Program headers", PHDR_OFFSET,
-                                  &program_headers_, sizeof(program_headers_)));
-    pieces.push_back(ElfFilePiece(".dynamic", dynamic_builder_.section_.sh_offset,
-                                  dynamic.data(), dynamic_builder_.section_.sh_size));
-    pieces.push_back(ElfFilePiece(".dynsym", dynsym_builder_.section_.sh_offset,
-                                  dynsym.data(), dynsym.size() * sizeof(Elf_Sym)));
-    pieces.push_back(ElfFilePiece(".dynstr", dynsym_builder_.GetStrTab()->section_.sh_offset,
-                                  dynstr_.c_str(), dynstr_.size()));
-    pieces.push_back(ElfFilePiece(".hash", hash_builder_.section_.sh_offset,
-                                  hash_.data(), hash_.size() * sizeof(Elf_Word)));
-    pieces.push_back(ElfFilePiece(".rodata", rodata_builder_.section_.sh_offset,
-                                  nullptr, rodata_builder_.section_.sh_size));
-    pieces.push_back(ElfFilePiece(".text", text_builder_.section_.sh_offset,
-                                  nullptr, text_builder_.section_.sh_size));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_,
+                                                      sizeof(elf_header_)));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
+                                                      &program_headers_, sizeof(program_headers_)));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic",
+                                                      dynamic_builder_.section_.sh_offset,
+                                                      dynamic.data(),
+                                                      dynamic_builder_.section_.sh_size));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynsym", dynsym_builder_.section_.sh_offset,
+                                                      dynsym.data(),
+                                                      dynsym.size() * sizeof(Elf_Sym)));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynstr",
+                                                    dynsym_builder_.GetStrTab()->section_.sh_offset,
+                                                    dynstr_.c_str(), dynstr_.size()));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".hash", hash_builder_.section_.sh_offset,
+                                                      hash_.data(),
+                                                      hash_.size() * sizeof(Elf_Word)));
+    pieces.push_back(new ElfFileRodataPiece<Elf_Word>(rodata_builder_.section_.sh_offset,
+                                                      oat_writer_));
+    pieces.push_back(new ElfFileOatTextPiece<Elf_Word>(text_builder_.section_.sh_offset,
+                                                       oat_writer_));
     if (IncludingDebugSymbols()) {
-      pieces.push_back(ElfFilePiece(".symtab", symtab_builder_.section_.sh_offset,
-                                    symtab.data(), symtab.size() * sizeof(Elf_Sym)));
-      pieces.push_back(ElfFilePiece(".strtab", symtab_builder_.GetStrTab()->section_.sh_offset,
-                                    strtab.c_str(), strtab.size()));
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".symtab",
+                                                        symtab_builder_.section_.sh_offset,
+                                                        symtab.data(),
+                                                        symtab.size() * sizeof(Elf_Sym)));
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".strtab",
+                                                    symtab_builder_.GetStrTab()->section_.sh_offset,
+                                                    strtab.c_str(), strtab.size()));
     }
-    pieces.push_back(ElfFilePiece(".shstrtab", shstrtab_builder_.section_.sh_offset,
-                                  &shstrtab_[0], shstrtab_.size()));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".shstrtab",
+                                                      shstrtab_builder_.section_.sh_offset,
+                                                      &shstrtab_[0], shstrtab_.size()));
     for (uint32_t i = 0; i < section_ptrs_.size(); ++i) {
       // Just add all the sections in induvidually since they are all over the
       // place on the heap/stack.
       Elf_Word cur_off = sections_offset + i * sizeof(Elf_Shdr);
-      pieces.push_back(ElfFilePiece("section table piece", cur_off,
-                                    section_ptrs_[i], sizeof(Elf_Shdr)));
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("section table piece", cur_off,
+                                                        section_ptrs_[i], sizeof(Elf_Shdr)));
+    }
+
+    // Postponed debug info.
+    for (auto it = other_builders_.begin(); it != other_builders_.end(); ++it) {
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(it->name_, it->section_.sh_offset,
+                                                        it->GetBuffer()->data(),
+                                                        it->GetBuffer()->size()));
     }
 
     if (!WriteOutFile(pieces)) {
       LOG(ERROR) << "Unable to write to file " << elf_file_->GetPath();
-      return false;
-    }
-    // write out the actual oat file data.
-    Elf_Word oat_data_offset = rodata_builder_.section_.sh_offset;
-    if (static_cast<off_t>(oat_data_offset) != lseek(elf_file_->Fd(), oat_data_offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to .rodata offset " << oat_data_offset
-                  << " for " << elf_file_->GetPath();
-      return false;
-    }
-    std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file_)));
-    if (!oat_writer_->Write(output_stream.get())) {
-      PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file_->GetPath();
+
+      STLDeleteElements(&pieces);  // Have to manually clean pieces.
       return false;
     }
 
+    STLDeleteElements(&pieces);  // Have to manually clean pieces.
     return true;
   }
 
@@ -1028,34 +1151,12 @@ class ElfBuilder FINAL {
     }
   }
 
-  struct ElfFilePiece {
-    ElfFilePiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size)
-    : dbg_name_(name), offset_(offset), data_(data), size_(size) {}
-    ~ElfFilePiece() {}
-
-    const std::string& dbg_name_;
-    Elf_Word offset_;
-    const void *data_;
-    Elf_Word size_;
-    static bool Compare(ElfFilePiece a, ElfFilePiece b) {
-      return a.offset_ < b.offset_;
-    }
-  };
 
   // Write each of the pieces out to the file.
-  bool WriteOutFile(const std::vector<ElfFilePiece>& pieces) {
-    // TODO It would be nice if this checked for overlap.
+  bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces) {
     for (auto it = pieces.begin(); it != pieces.end(); ++it) {
-      if (it->data_) {
-        if (static_cast<off_t>(it->offset_) != lseek(elf_file_->Fd(), it->offset_, SEEK_SET)) {
-          PLOG(ERROR) << "Failed to seek to " << it->dbg_name_ << " offset location "
-                      << it->offset_ << " for " << elf_file_->GetPath();
-          return false;
-        }
-        if (!elf_file_->WriteFully(it->data_, it->size_)) {
-          PLOG(ERROR) << "Failed to write " << it->dbg_name_ << " for " << elf_file_->GetPath();
-          return false;
-        }
+      if (!(*it)->Write(elf_file_)) {
+        return false;
       }
     }
     return true;
diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc
deleted file mode 100644
index 0646b75f37..0000000000
--- a/compiler/elf_patcher.cc
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "elf_patcher.h"
-
-#include <vector>
-#include <set>
-
-#include "class_linker.h"
-#include "elf_file.h"
-#include "elf_utils.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/array-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/class_loader.h"
-#include "mirror/dex_cache-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/string-inl.h"
-#include "oat.h"
-#include "os.h"
-#include "utils.h"
-
-namespace art {
-
-bool ElfPatcher::Patch(const CompilerDriver* driver, ElfFile* elf_file,
-                       const std::string& oat_location,
-                       ImageAddressCallback cb, void* cb_data,
-                       std::string* error_msg) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const OatFile* oat_file = class_linker->FindOpenedOatFileFromOatLocation(oat_location);
-  if (oat_file == nullptr) {
-    CHECK(Runtime::Current()->IsCompiler());
-    oat_file = OatFile::Open(oat_location, oat_location, NULL, false, error_msg);
-    if (oat_file == nullptr) {
-      *error_msg = StringPrintf("Unable to find or open oat file at '%s': %s", oat_location.c_str(),
-                                error_msg->c_str());
-      return false;
-    }
-    CHECK_EQ(class_linker->RegisterOatFile(oat_file), oat_file);
-  }
-  return ElfPatcher::Patch(driver, elf_file, oat_file,
-                           reinterpret_cast<uintptr_t>(oat_file->Begin()), cb, cb_data, error_msg);
-}
-
-bool ElfPatcher::Patch(const CompilerDriver* driver, ElfFile* elf, const OatFile* oat_file,
-                       uintptr_t oat_data_start, ImageAddressCallback cb, void* cb_data,
-                       std::string* error_msg) {
-  Elf32_Shdr* data_sec = elf->FindSectionByName(".rodata");
-  if (data_sec == nullptr) {
-    *error_msg = "Unable to find .rodata section and oat header";
-    return false;
-  }
-  OatHeader* oat_header = reinterpret_cast<OatHeader*>(elf->Begin() + data_sec->sh_offset);
-  if (!oat_header->IsValid()) {
-    *error_msg = "Oat header was not valid";
-    return false;
-  }
-
-  ElfPatcher p(driver, elf, oat_file, oat_header, oat_data_start, cb, cb_data, error_msg);
-  return p.PatchElf();
-}
-
-mirror::ArtMethod* ElfPatcher::GetTargetMethod(const CompilerDriver::CallPatchInformation* patch) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(
-      hs.NewHandle(class_linker->FindDexCache(*patch->GetTargetDexFile())));
-  mirror::ArtMethod* method = class_linker->ResolveMethod(*patch->GetTargetDexFile(),
-                                                          patch->GetTargetMethodIdx(),
-                                                          dex_cache,
-                                                          NullHandle<mirror::ClassLoader>(),
-                                                          NullHandle<mirror::ArtMethod>(),
-                                                          patch->GetTargetInvokeType());
-  CHECK(method != NULL)
-    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
-  CHECK(!method->IsRuntimeMethod())
-    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
-  CHECK(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx()) == method)
-    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
-    << PrettyMethod(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx())) << " "
-    << PrettyMethod(method);
-  return method;
-}
-
-mirror::Class* ElfPatcher::GetTargetType(const CompilerDriver::TypePatchInformation* patch) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
-          patch->GetTargetTypeDexFile())));
-  mirror::Class* klass = class_linker->ResolveType(patch->GetTargetTypeDexFile(),
-                                                   patch->GetTargetTypeIdx(),
-                                                   dex_cache, NullHandle<mirror::ClassLoader>());
-  CHECK(klass != NULL)
-    << patch->GetTargetTypeDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
-  CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
-    << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
-    << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
-    << PrettyClass(klass);
-  return klass;
-}
-
-void ElfPatcher::AddPatch(uintptr_t p) {
-  if (write_patches_ && patches_set_.find(p) == patches_set_.end()) {
-    patches_set_.insert(p);
-    patches_.push_back(p);
-  }
-}
-
-uint32_t* ElfPatcher::GetPatchLocation(uintptr_t patch_ptr) {
-  CHECK_GE(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->Begin()));
-  CHECK_LE(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->End()));
-  uintptr_t off = patch_ptr - reinterpret_cast<uintptr_t>(oat_file_->Begin());
-  uintptr_t ret = reinterpret_cast<uintptr_t>(oat_header_) + off;
-
-  CHECK_GE(ret, reinterpret_cast<uintptr_t>(elf_file_->Begin()));
-  CHECK_LT(ret, reinterpret_cast<uintptr_t>(elf_file_->End()));
-  return reinterpret_cast<uint32_t*>(ret);
-}
-
-void ElfPatcher::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
-                                                                patch->GetReferrerClassDefIdx(),
-                                                                patch->GetReferrerMethodIdx());
-  // TODO: make this Thumb2 specific
-  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(quick_oat_code) & ~0x1);
-  uintptr_t patch_ptr = reinterpret_cast<uintptr_t>(base + patch->GetLiteralOffset());
-  uint32_t* patch_location = GetPatchLocation(patch_ptr);
-  if (kIsDebugBuild) {
-    if (patch->IsCall()) {
-      const CompilerDriver::CallPatchInformation* cpatch = patch->AsCall();
-      const DexFile::MethodId& id =
-          cpatch->GetTargetDexFile()->GetMethodId(cpatch->GetTargetMethodIdx());
-      uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
-      uint32_t actual = *patch_location;
-      CHECK(actual == expected || actual == value) << "Patching call failed: " << std::hex
-          << " actual=" << actual
-          << " expected=" << expected
-          << " value=" << value;
-    }
-    if (patch->IsType()) {
-      const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
-      const DexFile::TypeId& id = tpatch->GetTargetTypeDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
-      uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
-      uint32_t actual = *patch_location;
-      CHECK(actual == expected || actual == value) << "Patching type failed: " << std::hex
-          << " actual=" << actual
-          << " expected=" << expected
-          << " value=" << value;
-    }
-  }
-  *patch_location = value;
-  oat_header_->UpdateChecksum(patch_location, sizeof(value));
-
-  if (patch->IsCall() && patch->AsCall()->IsRelative()) {
-    // We never record relative patches.
-    return;
-  }
-  uintptr_t loc = patch_ptr - (reinterpret_cast<uintptr_t>(oat_file_->Begin()) +
-                               oat_header_->GetExecutableOffset());
-  CHECK_GT(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->Begin()) +
-                      oat_header_->GetExecutableOffset());
-  CHECK_LT(loc, oat_file_->Size() - oat_header_->GetExecutableOffset());
-  AddPatch(loc);
-}
-
-bool ElfPatcher::PatchElf() {
-  // TODO if we are adding patches the resulting ELF file might have a
-  // potentially rather large amount of free space where patches might have been
-  // placed. We should adjust the ELF file to get rid of this excess space.
-  if (write_patches_) {
-    patches_.reserve(compiler_driver_->GetCodeToPatch().size() +
-                     compiler_driver_->GetMethodsToPatch().size() +
-                     compiler_driver_->GetClassesToPatch().size());
-  }
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "ElfPatcher");
-
-  typedef std::vector<const CompilerDriver::CallPatchInformation*> CallPatches;
-  const CallPatches& code_to_patch = compiler_driver_->GetCodeToPatch();
-  for (size_t i = 0; i < code_to_patch.size(); i++) {
-    const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
-
-    mirror::ArtMethod* target = GetTargetMethod(patch);
-    uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
-    DCHECK_NE(quick_code, 0U) << PrettyMethod(target);
-    const OatFile* target_oat =
-        class_linker->FindOpenedOatDexFileForDexFile(*patch->GetTargetDexFile())->GetOatFile();
-    // Get where the data actually starts. if target is this oat_file_ it is oat_data_start_,
-    // otherwise it is wherever target_oat is loaded.
-    uintptr_t oat_data_addr = GetBaseAddressFor(target_oat);
-    uintptr_t code_base = reinterpret_cast<uintptr_t>(target_oat->Begin());
-    uintptr_t code_offset = quick_code - code_base;
-    bool is_quick_offset = false;
-    if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge())) {
-      is_quick_offset = true;
-      code_offset = oat_header_->GetQuickToInterpreterBridgeOffset();
-    } else if (quick_code ==
-        reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) {
-      CHECK(target->IsNative());
-      is_quick_offset = true;
-      code_offset = oat_header_->GetQuickGenericJniTrampolineOffset();
-    }
-    uintptr_t value;
-    if (patch->IsRelative()) {
-      // value to patch is relative to the location being patched
-      const void* quick_oat_code =
-        class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
-                                         patch->GetReferrerClassDefIdx(),
-                                         patch->GetReferrerMethodIdx());
-      if (is_quick_offset) {
-        // If its a quick offset it means that we are doing a relative patch from the class linker
-        // oat_file to the elf_patcher oat_file so we need to adjust the quick oat code to be the
-        // one in the output oat_file (ie where it is actually going to be loaded).
-        quick_code = PointerToLowMemUInt32(reinterpret_cast<void*>(oat_data_addr + code_offset));
-        quick_oat_code =
-            reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(quick_oat_code) +
-                oat_data_addr - code_base);
-      }
-      uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code);
-      uintptr_t patch_location = base + patch->GetLiteralOffset();
-      value = quick_code - patch_location + patch->RelativeOffset();
-    } else if (code_offset != 0) {
-      value = PointerToLowMemUInt32(reinterpret_cast<void*>(oat_data_addr + code_offset));
-    } else {
-      value = 0;
-    }
-    SetPatchLocation(patch, value);
-  }
-
-  const CallPatches& methods_to_patch = compiler_driver_->GetMethodsToPatch();
-  for (size_t i = 0; i < methods_to_patch.size(); i++) {
-    const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i];
-    mirror::ArtMethod* target = GetTargetMethod(patch);
-    SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
-  }
-
-  const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
-      compiler_driver_->GetClassesToPatch();
-  for (size_t i = 0; i < classes_to_patch.size(); i++) {
-    const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
-    mirror::Class* target = GetTargetType(patch);
-    SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
-  }
-
-  if (write_patches_) {
-    return WriteOutPatchData();
-  }
-  return true;
-}
-
-bool ElfPatcher::WriteOutPatchData() {
-  Elf32_Shdr* shdr = elf_file_->FindSectionByName(".oat_patches");
-  if (shdr != nullptr) {
-    CHECK_EQ(shdr, elf_file_->FindSectionByType(SHT_OAT_PATCH))
-        << "Incorrect type for .oat_patches section";
-    CHECK_LE(patches_.size() * sizeof(uintptr_t), shdr->sh_size)
-        << "We got more patches than anticipated";
-    CHECK_LE(reinterpret_cast<uintptr_t>(elf_file_->Begin()) + shdr->sh_offset + shdr->sh_size,
-              reinterpret_cast<uintptr_t>(elf_file_->End())) << "section is too large";
-    CHECK(shdr == elf_file_->GetSectionHeader(elf_file_->GetSectionHeaderNum() - 1) ||
-          shdr->sh_offset + shdr->sh_size <= (shdr + 1)->sh_offset)
-        << "Section overlaps onto next section";
-    // It's mmap'd so we can just memcpy.
-    memcpy(elf_file_->Begin() + shdr->sh_offset, patches_.data(),
-           patches_.size() * sizeof(uintptr_t));
-    // TODO We should fill in the newly empty space between the last patch and
-    // the start of the next section by moving the following sections down if
-    // possible.
-    shdr->sh_size = patches_.size() * sizeof(uintptr_t);
-    return true;
-  } else {
-    LOG(ERROR) << "Unable to find section header for SHT_OAT_PATCH";
-    *error_msg_ = "Unable to find section to write patch information to in ";
-    *error_msg_ += elf_file_->GetFile().GetPath();
-    return false;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/elf_patcher.h b/compiler/elf_patcher.h
deleted file mode 100644
index 0a9f0a013e..0000000000
--- a/compiler/elf_patcher.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_ELF_PATCHER_H_
-#define ART_COMPILER_ELF_PATCHER_H_
-
-#include "base/mutex.h"
-#include "driver/compiler_driver.h"
-#include "elf_file.h"
-#include "mirror/art_method.h"
-#include "mirror/class.h"
-#include "mirror/object.h"
-#include "oat_file.h"
-#include "oat.h"
-#include "os.h"
-
-namespace art {
-
-class ElfPatcher {
- public:
-  typedef void* (*ImageAddressCallback)(void* data, mirror::Object* obj);
-
-  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
-                    const std::string& oat_location,
-                    ImageAddressCallback cb, void* cb_data,
-                    std::string* error_msg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
-                    const OatFile* oat_file, uintptr_t oat_data_begin,
-                    ImageAddressCallback cb, void* cb_data,
-                    std::string* error_msg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
-                    const std::string& oat_location,
-                    std::string* error_msg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ElfPatcher::Patch(driver, elf_file, oat_location,
-                             DefaultImageAddressCallback, nullptr, error_msg);
-  }
-
-  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
-                    const OatFile* oat_file, uintptr_t oat_data_begin,
-                    std::string* error_msg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ElfPatcher::Patch(driver, elf_file, oat_file, oat_data_begin,
-                             DefaultImageAddressCallback, nullptr, error_msg);
-  }
-
- private:
-  ElfPatcher(const CompilerDriver* driver, ElfFile* elf_file, const OatFile* oat_file,
-             OatHeader* oat_header, uintptr_t oat_data_begin,
-             ImageAddressCallback cb, void* cb_data, std::string* error_msg)
-      : compiler_driver_(driver), elf_file_(elf_file), oat_file_(oat_file),
-        oat_header_(oat_header), oat_data_begin_(oat_data_begin), get_image_address_(cb),
-        cb_data_(cb_data), error_msg_(error_msg),
-        write_patches_(compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {}
-  ~ElfPatcher() {}
-
-  static void* DefaultImageAddressCallback(void* data_unused, mirror::Object* obj) {
-    return static_cast<void*>(obj);
-  }
-
-  bool PatchElf()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  void AddPatch(uintptr_t off);
-
-  void SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Takes the pointer into the oat_file_ and get the pointer in to the ElfFile.
-  uint32_t* GetPatchLocation(uintptr_t patch_ptr);
-
-  bool WriteOutPatchData();
-
-  uintptr_t GetBaseAddressFor(const OatFile* f) {
-    if (f == oat_file_) {
-      return oat_data_begin_;
-    } else {
-      return reinterpret_cast<uintptr_t>(f->Begin());
-    }
-  }
-
-  const CompilerDriver* compiler_driver_;
-
-  // The elf_file containing the oat_data we are patching up
-  ElfFile* elf_file_;
-
-  // The oat_file that is actually loaded.
-  const OatFile* oat_file_;
-
-  // The oat_header_ within the elf_file_
-  OatHeader* oat_header_;
-
-  // Where the elf_file will be loaded during normal runs.
-  uintptr_t oat_data_begin_;
-
-  // Callback to get image addresses.
-  ImageAddressCallback get_image_address_;
-  void* cb_data_;
-
-  std::string* error_msg_;
-  std::vector<uintptr_t> patches_;
-  std::set<uintptr_t> patches_set_;
-  bool write_patches_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfPatcher);
-};
-
-}  // namespace art
-#endif  // ART_COMPILER_ELF_PATCHER_H_
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index 3dba426242..f0176412e1 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -67,19 +67,40 @@ bool ElfWriterMclinker::Write(OatWriter* oat_writer,
                               bool is_host) {
   std::vector<uint8_t> oat_contents;
   oat_contents.reserve(oat_writer->GetSize());
-  VectorOutputStream output_stream("oat contents", oat_contents);
-  CHECK(oat_writer->Write(&output_stream));
-  CHECK_EQ(oat_writer->GetSize(), oat_contents.size());
 
   Init();
-  AddOatInput(oat_contents);
+  mcld::LDSection* oat_section = AddOatInput(oat_writer, &oat_contents);
   if (kUsePortableCompiler) {
     AddMethodInputs(dex_files);
     AddRuntimeInputs(android_root, is_host);
   }
-  if (!Link()) {
+
+  // link inputs
+  if (!linker_->link(*module_.get(), *ir_builder_.get())) {
+    LOG(ERROR) << "Failed to link " << elf_file_->GetPath();
+    return false;
+  }
+
+  // Fill oat_contents.
+  VectorOutputStream output_stream("oat contents", oat_contents);
+  oat_writer->SetOatDataOffset(oat_section->offset());
+  CHECK(oat_writer->Write(&output_stream));
+  CHECK_EQ(oat_writer->GetSize(), oat_contents.size());
+
+  // emit linked output
+  // TODO: avoid dup of fd by fixing Linker::emit to not close the argument fd.
+  int fd = dup(elf_file_->Fd());
+  if (fd == -1) {
+    PLOG(ERROR) << "Failed to dup file descriptor for " << elf_file_->GetPath();
+    return false;
+  }
+  if (!linker_->emit(*module_.get(), fd)) {
+    LOG(ERROR) << "Failed to emit " << elf_file_->GetPath();
     return false;
   }
+  mcld::Finalize();
+  LOG(INFO) << "ELF file written successfully: " << elf_file_->GetPath();
+
   oat_contents.clear();
   if (kUsePortableCompiler) {
     FixupOatMethodOffsets(dex_files);
@@ -156,16 +177,13 @@ void ElfWriterMclinker::Init() {
   linker_->emulate(*linker_script_.get(), *linker_config_.get());
 }
 
-void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
-  // Add an artificial memory input. Based on LinkerTest.
-  std::string error_msg;
-  std::unique_ptr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath(), &error_msg));
-  CHECK(oat_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg;
-
-  const char* oat_data_start = reinterpret_cast<const char*>(&oat_file->GetOatHeader());
-  const size_t oat_data_length = oat_file->GetOatHeader().GetExecutableOffset();
+mcld::LDSection* ElfWriterMclinker::AddOatInput(OatWriter* oat_writer,
+                                                std::vector<uint8_t>* oat_contents) {
+  // NOTE: oat_contents has sufficient reserved space but it doesn't contain the data yet.
+  const char* oat_data_start = reinterpret_cast<const char*>(&(*oat_contents)[0]);
+  const size_t oat_data_length = oat_writer->GetOatHeader().GetExecutableOffset();
   const char* oat_code_start = oat_data_start + oat_data_length;
-  const size_t oat_code_length = oat_file->Size() - oat_data_length;
+  const size_t oat_code_length = oat_writer->GetSize() - oat_data_length;
 
   // TODO: ownership of oat_input?
   oat_input_ = ir_builder_->CreateInput("oat contents",
@@ -205,7 +223,7 @@ void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
 
   // TODO: why does IRBuilder::CreateRegion take a non-const pointer?
   mcld::Fragment* text_fragment = ir_builder_->CreateRegion(const_cast<char*>(oat_data_start),
-                                                            oat_file->Size());
+                                                            oat_writer->GetSize());
   CHECK(text_fragment != NULL);
   ir_builder_->AppendFragment(*text_fragment, *text_sectiondata);
 
@@ -236,6 +254,8 @@ void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
                          // subtract a word so symbol is within section
                          (oat_data_length + oat_code_length) - sizeof(uint32_t),  // offset
                          text_section);
+
+  return text_section;
 }
 
 void ElfWriterMclinker::AddMethodInputs(const std::vector<const DexFile*>& dex_files) {
@@ -322,29 +342,6 @@ void ElfWriterMclinker::AddRuntimeInputs(const std::string& android_root, bool i
   CHECK(libm_lib_input_input != NULL);
 }
 
-bool ElfWriterMclinker::Link() {
-  // link inputs
-  if (!linker_->link(*module_.get(), *ir_builder_.get())) {
-    LOG(ERROR) << "Failed to link " << elf_file_->GetPath();
-    return false;
-  }
-
-  // emit linked output
-  // TODO: avoid dup of fd by fixing Linker::emit to not close the argument fd.
-  int fd = dup(elf_file_->Fd());
-  if (fd == -1) {
-    PLOG(ERROR) << "Failed to dup file descriptor for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!linker_->emit(*module_.get(), fd)) {
-    LOG(ERROR) << "Failed to emit " << elf_file_->GetPath();
-    return false;
-  }
-  mcld::Finalize();
-  LOG(INFO) << "ELF file written successfully: " << elf_file_->GetPath();
-  return true;
-}
-
 void ElfWriterMclinker::FixupOatMethodOffsets(const std::vector<const DexFile*>& dex_files) {
   std::string error_msg;
   std::unique_ptr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false, &error_msg));
diff --git a/compiler/elf_writer_mclinker.h b/compiler/elf_writer_mclinker.h
index 955e5d2614..489fefb284 100644
--- a/compiler/elf_writer_mclinker.h
+++ b/compiler/elf_writer_mclinker.h
@@ -61,11 +61,10 @@ class ElfWriterMclinker FINAL : public ElfWriter {
   ~ElfWriterMclinker();
 
   void Init();
-  void AddOatInput(std::vector<uint8_t>& oat_contents);
+  mcld::LDSection* AddOatInput(OatWriter* oat_writer, std::vector<uint8_t>* oat_contents);
   void AddMethodInputs(const std::vector<const DexFile*>& dex_files);
   void AddCompiledCodeInput(const CompiledCode& compiled_code);
   void AddRuntimeInputs(const std::string& android_root, bool is_host);
-  bool Link();
   void FixupOatMethodOffsets(const std::vector<const DexFile*>& dex_files)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t FixupCompiledCodeOffset(ElfFile& elf_file,
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index dbd3a37dbb..e661324676 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -85,26 +85,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-// Add patch information to this section. Each patch is a Elf_Word that
-// identifies an offset from the start of the text section
-static void ReservePatchSpace(const CompilerDriver* compiler_driver, std::vector<uint8_t>* buffer,
-                              bool debug) {
-  size_t size =
-      compiler_driver->GetCodeToPatch().size() +
-      compiler_driver->GetMethodsToPatch().size() +
-      compiler_driver->GetClassesToPatch().size();
-  if (size == 0) {
-    if (debug) {
-      LOG(INFO) << "No patches to record";
-    }
-    return;
-  }
-  buffer->resize(size * sizeof(uintptr_t));
-  if (debug) {
-    LOG(INFO) << "Patches reserved for " << size;
-  }
-}
-
 std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
   std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
 
@@ -219,6 +199,9 @@ class OatWriterWrapper : public CodeOutput {
  public:
   explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
 
+  void SetCodeOffset(size_t offset) {
+    oat_writer_->SetOatDataOffset(offset);
+  }
   bool Write(OutputStream* out) OVERRIDE {
     return oat_writer_->Write(out);
   }
@@ -274,7 +257,13 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
   if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
     ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> oat_patches(
         ".oat_patches", SHT_OAT_PATCH, 0, NULL, 0, sizeof(uintptr_t), sizeof(uintptr_t));
-    ReservePatchSpace(compiler_driver_, oat_patches.GetBuffer(), debug);
+    const std::vector<uintptr_t>& locations = oat_writer->GetAbsolutePatchLocations();
+    const uint8_t* begin = reinterpret_cast<const uint8_t*>(&locations[0]);
+    const uint8_t* end = begin + locations.size() * sizeof(locations[0]);
+    oat_patches.GetBuffer()->assign(begin, end);
+    if (debug) {
+      LOG(INFO) << "Prepared .oat_patches for " << locations.size() << " patches.";
+    }
     builder->RegisterRawSection(oat_patches);
   }
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index f2a16e509f..2a37049a1d 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -62,6 +62,8 @@ TEST_F(ImageTest, WriteRead) {
   oat_filename += "oat";
   ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
 
+  const uintptr_t requested_image_base = ART_BASE_ADDRESS;
+  ImageWriter writer(*compiler_driver_, requested_image_base);
   {
     {
       jobject class_loader = NULL;
@@ -79,15 +81,15 @@ TEST_F(ImageTest, WriteRead) {
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
       t.NewTiming("WriteElf");
-      ScopedObjectAccess soa(Thread::Current());
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(), &timings,
-                           &key_value_store);
-      bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                                !kIsTargetBuild,
-                                                class_linker->GetBootClassPath(),
-                                                &oat_writer,
-                                                oat_file.GetFile());
+      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(),
+                           &writer, &timings, &key_value_store);
+      bool success = writer.PrepareImageAddressSpace() &&
+          compiler_driver_->WriteElf(GetTestAndroidRoot(),
+                                     !kIsTargetBuild,
+                                     class_linker->GetBootClassPath(),
+                                     &oat_writer,
+                                     oat_file.GetFile());
       ASSERT_TRUE(success);
     }
   }
@@ -95,11 +97,9 @@ TEST_F(ImageTest, WriteRead) {
   std::unique_ptr<File> dup_oat(OS::OpenFileReadWrite(oat_file.GetFilename().c_str()));
   ASSERT_TRUE(dup_oat.get() != NULL);
 
-  const uintptr_t requested_image_base = ART_BASE_ADDRESS;
   {
-    ImageWriter writer(*compiler_driver_.get());
-    bool success_image = writer.Write(image_file.GetFilename(), requested_image_base,
-                                      dup_oat->GetPath(), dup_oat->GetPath());
+    bool success_image =
+        writer.Write(image_file.GetFilename(), dup_oat->GetPath(), dup_oat->GetPath());
     ASSERT_TRUE(success_image);
     bool success_fixup = ElfFixup::Fixup(dup_oat.get(), writer.GetOatDataBegin());
     ASSERT_TRUE(success_fixup);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c08d3bdf8b..1c8b8d56bf 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -29,7 +29,6 @@
 #include "driver/compiler_driver.h"
 #include "elf_file.h"
 #include "elf_utils.h"
-#include "elf_patcher.h"
 #include "elf_writer.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -68,15 +67,38 @@ using ::art::mirror::String;
 
 namespace art {
 
+bool ImageWriter::PrepareImageAddressSpace() {
+  {
+    Thread::Current()->TransitionFromSuspendedToRunnable();
+    PruneNonImageClasses();  // Remove junk
+    ComputeLazyFieldsForImageClasses();  // Add useful information
+    ComputeEagerResolvedStrings();
+    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  heap->CollectGarbage(false);  // Remove garbage.
+
+  if (!AllocMemory()) {
+    return false;
+  }
+
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    CheckNonImageClassesRemoved();
+  }
+
+  Thread::Current()->TransitionFromSuspendedToRunnable();
+  CalculateNewObjectOffsets();
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+  return true;
+}
+
 bool ImageWriter::Write(const std::string& image_filename,
-                        uintptr_t image_begin,
                         const std::string& oat_filename,
                         const std::string& oat_location) {
   CHECK(!image_filename.empty());
 
-  CHECK_NE(image_begin, 0U);
-  image_begin_ = reinterpret_cast<byte*>(image_begin);
-
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
@@ -115,35 +137,18 @@ bool ImageWriter::Write(const std::string& image_filename,
       oat_file_->GetOatHeader().GetQuickResolutionTrampolineOffset();
   quick_to_interpreter_bridge_offset_ =
       oat_file_->GetOatHeader().GetQuickToInterpreterBridgeOffset();
-  {
-    Thread::Current()->TransitionFromSuspendedToRunnable();
-    PruneNonImageClasses();  // Remove junk
-    ComputeLazyFieldsForImageClasses();  // Add useful information
-    ComputeEagerResolvedStrings();
-    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
-  }
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  heap->CollectGarbage(false);  // Remove garbage.
 
-  if (!AllocMemory()) {
-    return false;
-  }
-
-  if (kIsDebugBuild) {
-    ScopedObjectAccess soa(Thread::Current());
-    CheckNonImageClassesRemoved();
-  }
-
-  Thread::Current()->TransitionFromSuspendedToRunnable();
   size_t oat_loaded_size = 0;
   size_t oat_data_offset = 0;
   ElfWriter::GetOatElfInformation(oat_file.get(), oat_loaded_size, oat_data_offset);
-  CalculateNewObjectOffsets(oat_loaded_size, oat_data_offset);
-  CopyAndFixupObjects();
 
-  PatchOatCodeAndMethods(oat_file.get());
+  Thread::Current()->TransitionFromSuspendedToRunnable();
+  CreateHeader(oat_loaded_size, oat_data_offset);
+  CopyAndFixupObjects();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
+  SetOatChecksumFromElfFile(oat_file.get());
+
   std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   if (image_file.get() == NULL) {
@@ -527,8 +532,7 @@ void ImageWriter::WalkFieldsCallback(mirror::Object* obj, void* arg) {
   writer->WalkFieldsInOrder(obj);
 }
 
-void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_data_offset) {
-  CHECK_NE(0U, oat_loaded_size);
+void ImageWriter::CalculateNewObjectOffsets() {
   Thread* self = Thread::Current();
   StackHandleScope<1> hs(self);
   Handle<ObjectArray<Object>> image_roots(hs.NewHandle(CreateImageRoots()));
@@ -548,7 +552,14 @@ void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_d
     heap->VisitObjects(WalkFieldsCallback, this);
   }
 
-  const byte* oat_file_begin = image_begin_ + RoundUp(image_end_, kPageSize);
+  image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots.Get()));
+
+  // Note that image_end_ is left at end of used space
+}
+
+void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
+  CHECK_NE(0U, oat_loaded_size);
+  const byte* oat_file_begin = GetOatFileBegin();
   const byte* oat_file_end = oat_file_begin + oat_loaded_size;
   oat_data_begin_ = oat_file_begin + oat_data_offset;
   const byte* oat_data_end = oat_data_begin_ + oat_file_->Size();
@@ -558,21 +569,19 @@ void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_d
   const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * kObjectAlignment;
   const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
       heap_bytes_per_bitmap_byte;
-  ImageHeader image_header(PointerToLowMemUInt32(image_begin_),
-                           static_cast<uint32_t>(image_end_),
-                           RoundUp(image_end_, kPageSize),
-                           RoundUp(bitmap_bytes, kPageSize),
-                           PointerToLowMemUInt32(GetImageAddress(image_roots.Get())),
-                           oat_file_->GetOatHeader().GetChecksum(),
-                           PointerToLowMemUInt32(oat_file_begin),
-                           PointerToLowMemUInt32(oat_data_begin_),
-                           PointerToLowMemUInt32(oat_data_end),
-                           PointerToLowMemUInt32(oat_file_end));
-  memcpy(image_->Begin(), &image_header, sizeof(image_header));
-
-  // Note that image_end_ is left at end of used space
+  new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_),
+                                    static_cast<uint32_t>(image_end_),
+                                    RoundUp(image_end_, kPageSize),
+                                    RoundUp(bitmap_bytes, kPageSize),
+                                    image_roots_address_,
+                                    oat_file_->GetOatHeader().GetChecksum(),
+                                    PointerToLowMemUInt32(oat_file_begin),
+                                    PointerToLowMemUInt32(oat_data_begin_),
+                                    PointerToLowMemUInt32(oat_data_end),
+                                    PointerToLowMemUInt32(oat_file_end));
 }
 
+
 void ImageWriter::CopyAndFixupObjects()
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "ImageWriter");
@@ -809,19 +818,12 @@ static OatHeader* GetOatHeaderFromElf(ElfFile* elf) {
   return reinterpret_cast<OatHeader*>(elf->Begin() + data_sec->sh_offset);
 }
 
-void ImageWriter::PatchOatCodeAndMethods(File* elf_file) {
+void ImageWriter::SetOatChecksumFromElfFile(File* elf_file) {
   std::string error_msg;
   std::unique_ptr<ElfFile> elf(ElfFile::Open(elf_file, PROT_READ|PROT_WRITE,
                                              MAP_SHARED, &error_msg));
   if (elf.get() == nullptr) {
-    LOG(FATAL) << "Unable patch oat file: " << error_msg;
-    return;
-  }
-  if (!ElfPatcher::Patch(&compiler_driver_, elf.get(), oat_file_,
-                         reinterpret_cast<uintptr_t>(oat_data_begin_),
-                         GetImageAddressCallback, reinterpret_cast<void*>(this),
-                         &error_msg)) {
-    LOG(FATAL) << "unable to patch oat file: " << error_msg;
+    LOG(FATAL) << "Unable open oat file: " << error_msg;
     return;
   }
   OatHeader* oat_header = GetOatHeaderFromElf(elf.get());
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index e8bcf7f885..bdf06148ec 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -37,17 +37,39 @@ namespace art {
 // Write a Space built during compilation for use during execution.
 class ImageWriter {
  public:
-  explicit ImageWriter(const CompilerDriver& compiler_driver)
-      : compiler_driver_(compiler_driver), oat_file_(NULL), image_end_(0), image_begin_(NULL),
+  ImageWriter(const CompilerDriver& compiler_driver, uintptr_t image_begin)
+      : compiler_driver_(compiler_driver), image_begin_(reinterpret_cast<byte*>(image_begin)),
+        image_end_(0), image_roots_address_(0), oat_file_(NULL),
         oat_data_begin_(NULL), interpreter_to_interpreter_bridge_offset_(0),
-        interpreter_to_compiled_code_bridge_offset_(0), portable_imt_conflict_trampoline_offset_(0),
-        portable_resolution_trampoline_offset_(0), quick_generic_jni_trampoline_offset_(0),
-        quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0) {}
+        interpreter_to_compiled_code_bridge_offset_(0), jni_dlsym_lookup_offset_(0),
+        portable_imt_conflict_trampoline_offset_(0), portable_resolution_trampoline_offset_(0),
+        portable_to_interpreter_bridge_offset_(0), quick_generic_jni_trampoline_offset_(0),
+        quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
+        quick_to_interpreter_bridge_offset_(0) {
+    CHECK_NE(image_begin, 0U);
+  }
 
   ~ImageWriter() {}
 
+  bool PrepareImageAddressSpace();
+
+  bool IsImageAddressSpaceReady() const {
+    return image_roots_address_ != 0u;
+  }
+
+  mirror::Object* GetImageAddress(mirror::Object* object) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (object == NULL) {
+      return NULL;
+    }
+    return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object));
+  }
+
+  byte* GetOatFileBegin() const {
+    return image_begin_ + RoundUp(image_end_, kPageSize);
+  }
+
   bool Write(const std::string& image_filename,
-             uintptr_t image_begin,
              const std::string& oat_filename,
              const std::string& oat_location)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
@@ -75,14 +97,6 @@ class ImageWriter {
     return reinterpret_cast<ImageWriter*>(writer)->GetImageAddress(obj);
   }
 
-  mirror::Object* GetImageAddress(mirror::Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (object == NULL) {
-      return NULL;
-    }
-    return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object));
-  }
-
   mirror::Object* GetLocalAddress(mirror::Object* object) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     size_t offset = GetImageOffset(object);
@@ -131,7 +145,9 @@ class ImageWriter {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Lays out where the image objects will be at runtime.
-  void CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_data_offset)
+  void CalculateNewObjectOffsets()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void CreateHeader(size_t oat_loaded_size, size_t oat_data_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* CreateImageRoots() const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -162,23 +178,25 @@ class ImageWriter {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Patches references in OatFile to expect runtime addresses.
-  void PatchOatCodeAndMethods(File* elf_file)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetOatChecksumFromElfFile(File* elf_file);
 
   const CompilerDriver& compiler_driver_;
 
+  // Beginning target image address for the output image.
+  byte* image_begin_;
+
+  // Offset to the free space in image_.
+  size_t image_end_;
+
+  // The image roots address in the image.
+  uint32_t image_roots_address_;
+
   // oat file with code for this image
   OatFile* oat_file_;
 
   // Memory mapped for generating the image.
   std::unique_ptr<MemMap> image_;
 
-  // Offset to the free space in image_.
-  size_t image_end_;
-
-  // Beginning target image address for the output image.
-  byte* image_begin_;
-
   // Saved hashes (objects are inside of the image so that they don't move).
   std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 80d7b982b4..0b1f9e2cf9 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -114,7 +114,6 @@ TEST_F(OatTest, WriteRead) {
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
   }
 
-  ScopedObjectAccess soa(Thread::Current());
   ScratchFile tmp;
   SafeMap<std::string, std::string> key_value_store;
   key_value_store.Put(OatHeader::kImageLocationKey, "lue.art");
@@ -123,6 +122,7 @@ TEST_F(OatTest, WriteRead) {
                        4096U,
                        0,
                        compiler_driver_.get(),
+                       nullptr,
                        &timings,
                        &key_value_store);
   bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
@@ -152,6 +152,7 @@ TEST_F(OatTest, WriteRead) {
                                                                     &dex_file_checksum);
   ASSERT_TRUE(oat_dex_file != nullptr);
   CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
+  ScopedObjectAccess soa(Thread::Current());
   for (size_t i = 0; i < dex_file->NumClassDefs(); i++) {
     const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
     const byte* class_data = dex_file->GetClassData(class_def);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index e74d6de4eb..dd64368abc 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -27,6 +27,7 @@
 #include "dex_file-inl.h"
 #include "dex/verification_results.h"
 #include "gc/space/space.h"
+#include "image_writer.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -36,10 +37,270 @@
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
+#include "utils/arm/assembler_thumb2.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 
+class OatWriter::RelativeCallPatcher {
+ public:
+  virtual ~RelativeCallPatcher() { }
+
+  // Reserve space for relative call thunks if needed, return adjusted offset.
+  // After all methods have been processed it's call one last time with compiled_method == nullptr.
+  virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) = 0;
+
+  // Write relative call thunks if needed, return adjusted offset.
+  virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
+
+  // Patch method code. The input displacement is relative to the patched location,
+  // the patcher may need to adjust it if the correct base is different.
+  virtual void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+                     uint32_t target_offset) = 0;
+
+ protected:
+  RelativeCallPatcher() { }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(RelativeCallPatcher);
+};
+
+class OatWriter::NoRelativeCallPatcher FINAL : public RelativeCallPatcher {
+ public:
+  NoRelativeCallPatcher() { }
+
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
+    return offset;  // No space reserved; no patches expected.
+  }
+
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+    return offset;  // No thunks added; no patches expected.
+  }
+
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    LOG(FATAL) << "Unexpected relative patch.";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NoRelativeCallPatcher);
+};
+
+class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher {
+ public:
+  X86RelativeCallPatcher() { }
+
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
+    return offset;  // No space reserved; no limit on relative call distance.
+  }
+
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+    return offset;  // No thunks added; no limit on relative call distance.
+  }
+
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    DCHECK_LE(literal_offset + 4u, code->size());
+    // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+    uint32_t displacement = target_offset - patch_offset;
+    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+
+    typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
+    reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
+  }
+
+ private:
+  // PC displacement from patch location; x86 PC for relative calls points to the next
+  // instruction and the patch location is 4 bytes earlier.
+  static constexpr int32_t kPcDisplacement = 4;
+
+  DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
+};
+
+class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher {
+ public:
+  explicit Thumb2RelativeCallPatcher(OatWriter* writer)
+      : writer_(writer), thunk_code_(CompileThunkCode()),
+        thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
+  }
+
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
+    // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
+    // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
+    // of code. To avoid any alignment discrepancies for the final chunk, we always align the
+    // offset after reserving of writing any chunk.
+    if (UNLIKELY(compiled_method == nullptr)) {
+      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+      bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
+      if (needs_thunk) {
+        thunk_locations_.push_back(aligned_offset);
+        offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), kThumb2);
+      }
+      return offset;
+    }
+    DCHECK(compiled_method->GetQuickCode() != nullptr);
+    uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
+    uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+    uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
+    if (!unprocessed_patches_.empty() &&
+        next_aligned_offset - unprocessed_patches_.front().second > kMaxPositiveDisplacement) {
+      bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
+      if (needs_thunk) {
+        // A single thunk will cover all pending patches.
+        unprocessed_patches_.clear();
+        uint32_t thunk_location = compiled_method->AlignCode(offset);
+        thunk_locations_.push_back(thunk_location);
+        offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), kThumb2);
+      }
+    }
+    for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+      if (patch.Type() == kLinkerPatchCallRelative) {
+        unprocessed_patches_.emplace_back(patch.TargetMethod(),
+                                          quick_code_offset + patch.LiteralOffset());
+      }
+    }
+    return offset;
+  }
+
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+    if (current_thunk_to_write_ == thunk_locations_.size()) {
+      return offset;
+    }
+    uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+    if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
+      ++current_thunk_to_write_;
+      uint32_t aligned_code_delta = aligned_offset - offset;
+      if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        return 0u;
+      }
+      if (!out->WriteFully(thunk_code_.data(), thunk_code_.size())) {
+        return 0u;
+      }
+      writer_->size_relative_call_thunks_ += thunk_code_.size();
+      uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
+      // Align after writing chunk, see the ReserveSpace() above.
+      offset = CompiledMethod::AlignCode(thunk_end_offset, kThumb2);
+      aligned_code_delta = offset - thunk_end_offset;
+      if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        return 0u;
+      }
+    }
+    return offset;
+  }
+
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    DCHECK_LE(literal_offset + 4u, code->size());
+    DCHECK_EQ(literal_offset & 1u, 0u);
+    DCHECK_EQ(patch_offset & 1u, 0u);
+    DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
+    // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+    uint32_t displacement = target_offset - 1u - patch_offset;
+    // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
+    if (displacement > kMaxPositiveDisplacement && displacement < -kMaxNegativeDisplacement) {
+      // Unwritten thunks have higher offsets, check if it's within range.
+      DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
+             thunk_locations_[current_thunk_to_write_] > patch_offset);
+      if (current_thunk_to_write_ != thunk_locations_.size() &&
+          thunk_locations_[current_thunk_to_write_] - patch_offset < kMaxPositiveDisplacement) {
+        displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
+      } else {
+        // We must have a previous thunk then.
+        DCHECK_NE(current_thunk_to_write_, 0u);
+        DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
+        displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
+        DCHECK(displacement >= -kMaxNegativeDisplacement);
+      }
+    }
+    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+    DCHECK_EQ(displacement & 1u, 0u);
+    DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
+    uint32_t signbit = (displacement >> 31) & 0x1;
+    uint32_t i1 = (displacement >> 23) & 0x1;
+    uint32_t i2 = (displacement >> 22) & 0x1;
+    uint32_t imm10 = (displacement >> 12) & 0x03ff;
+    uint32_t imm11 = (displacement >> 1) & 0x07ff;
+    uint32_t j1 = i1 ^ (signbit ^ 1);
+    uint32_t j2 = i2 ^ (signbit ^ 1);
+    uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
+    value |= 0xf000d000;  // BL
+
+    uint8_t* addr = &(*code)[literal_offset];
+    // Check that we're just overwriting an existing BL.
+    DCHECK_EQ(addr[1] & 0xf8, 0xf0);
+    DCHECK_EQ(addr[3] & 0xd0, 0xd0);
+    // Write the new BL.
+    addr[0] = (value >> 16) & 0xff;
+    addr[1] = (value >> 24) & 0xff;
+    addr[2] = (value >> 0) & 0xff;
+    addr[3] = (value >> 8) & 0xff;
+  }
+
+ private:
+  bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
+    // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+    while (!unprocessed_patches_.empty()) {
+      uint32_t patch_offset = unprocessed_patches_.front().second;
+      auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
+      if (it == writer_->method_offset_map_.end()) {
+        // If still unresolved, check if we have a thunk within range.
+        DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+        if (thunk_locations_.empty() ||
+            patch_offset - thunk_locations_.back() > kMaxNegativeDisplacement) {
+          return next_aligned_offset - patch_offset > kMaxPositiveDisplacement;
+        }
+      } else if (it->second >= patch_offset) {
+        DCHECK_LE(it->second - patch_offset, kMaxPositiveDisplacement);
+      } else {
+        // When calling back, check if we have a thunk that's closer than the actual target.
+        uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
+            ? it->second
+            : thunk_locations_.back();
+        DCHECK_GT(patch_offset, target_offset);
+        if (patch_offset - target_offset > kMaxNegativeDisplacement) {
+          return true;
+        }
+      }
+      unprocessed_patches_.pop_front();
+    }
+    return false;
+  }
+
+  static std::vector<uint8_t> CompileThunkCode() {
+    // The thunk just uses the entry point in the ArtMethod. This works even for calls
+    // to the generic JNI and interpreter trampolines.
+    arm::Thumb2Assembler assembler;
+    assembler.LoadFromOffset(
+        arm::kLoadWord, arm::PC, arm::R0,
+        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+    assembler.bkpt(0);
+    std::vector<uint8_t> thunk_code(assembler.CodeSize());
+    MemoryRegion code(thunk_code.data(), thunk_code.size());
+    assembler.FinalizeInstructions(code);
+    return thunk_code;
+  }
+
+  // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
+  static constexpr int32_t kPcDisplacement = 4;
+
+  // Maximum positive and negative displacement measured from the patch location.
+  // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
+  // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
+  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
+  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
+
+  OatWriter* const writer_;
+  const std::vector<uint8_t> thunk_code_;
+  std::vector<uint32_t> thunk_locations_;
+  size_t current_thunk_to_write_;
+
+  // ReserveSpace() tracks unprocessed patches.
+  typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+  std::deque<UnprocessedPatch> unprocessed_patches_;
+
+  DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
+};
+
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -53,10 +314,14 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
                      uintptr_t image_file_location_oat_begin,
                      int32_t image_patch_delta,
                      const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
                      TimingLogger* timings,
                      SafeMap<std::string, std::string>* key_value_store)
   : compiler_driver_(compiler),
+    image_writer_(image_writer),
     dex_files_(&dex_files),
+    size_(0u),
+    oat_data_offset_(0u),
     image_file_location_oat_checksum_(image_file_location_oat_checksum),
     image_file_location_oat_begin_(image_file_location_oat_begin),
     image_patch_delta_(image_patch_delta),
@@ -81,6 +346,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
     size_method_header_(0),
     size_code_(0),
     size_code_alignment_(0),
+    size_relative_call_thunks_(0),
     size_mapping_table_(0),
     size_vmap_table_(0),
     size_gc_map_(0),
@@ -92,9 +358,27 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
-    size_oat_class_method_offsets_(0) {
+    size_oat_class_method_offsets_(0),
+    method_offset_map_() {
   CHECK(key_value_store != nullptr);
 
+  switch (compiler_driver_->GetInstructionSet()) {
+    case kX86:
+    case kX86_64:
+      relative_call_patcher_.reset(new X86RelativeCallPatcher);
+      break;
+    case kArm:
+      // Fall through: we generate Thumb2 code for "arm".
+    case kThumb2:
+      relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
+      break;
+    case kArm64:
+      // TODO: Implement relative calls for arm64.
+    default:
+      relative_call_patcher_.reset(new NoRelativeCallPatcher);
+      break;
+  }
+
   size_t offset;
   {
     TimingLogger::ScopedTiming split("InitOatHeader", timings);
@@ -127,6 +411,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
   size_ = offset;
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
+  CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr);
   CHECK_EQ(compiler->IsImage(),
            key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
   CHECK_ALIGNED(image_patch_delta_, kPageSize);
@@ -316,6 +601,7 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor {
     OatClass* oat_class = new OatClass(offset_, compiled_methods_,
                                        num_non_null_compiled_methods_, status);
     writer_->oat_classes_.push_back(oat_class);
+    oat_class->UpdateChecksum(writer_->oat_header_);
     offset_ += oat_class->SizeOf();
     return DexMethodVisitor::EndClass();
   }
@@ -329,6 +615,16 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
  public:
   InitCodeMethodVisitor(OatWriter* writer, size_t offset)
     : OatDexMethodVisitor(writer, offset) {
+    writer_->absolute_patch_locations_.reserve(
+        writer_->compiler_driver_->GetNonRelativeLinkerPatchCount());
+  }
+
+  bool EndClass() {
+    OatDexMethodVisitor::EndClass();
+    if (oat_class_index_ == writer_->oat_classes_.size()) {
+      offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, nullptr);
+    }
+    return true;
   }
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
@@ -350,6 +646,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
             oat_method_offsets_offset + OFFSETOF_MEMBER(OatMethodOffsets, code_offset_));
       } else {
         CHECK(quick_code != nullptr);
+        offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method);
         offset_ = compiled_method->AlignCode(offset_);
         DCHECK_ALIGNED_PARAM(offset_,
                              GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
@@ -369,6 +666,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
           dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
         }
 
+        MethodReference method_ref(dex_file_, it.GetMemberIndex());
+        auto method_lb = writer_->method_offset_map_.lower_bound(method_ref);
+        if (method_lb != writer_->method_offset_map_.end() &&
+            !writer_->method_offset_map_.key_comp()(method_ref, method_lb->first)) {
+          // TODO: Should this be a hard failure?
+          LOG(WARNING) << "Multiple definitions of "
+              << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
+              << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
+        } else {
+          writer_->method_offset_map_.PutBefore(method_lb, method_ref, quick_code_offset);
+        }
+
         // Update quick method header.
         DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
         OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
@@ -392,12 +701,19 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
                                               frame_size_in_bytes, core_spill_mask, fp_spill_mask,
                                               code_size);
 
-        // Update checksum if this wasn't a duplicate.
         if (!deduped) {
-          writer_->oat_header_->UpdateChecksum(method_header, sizeof(*method_header));
+          // Update offsets. (Checksum is updated when writing.)
           offset_ += sizeof(*method_header);  // Method header is prepended before code.
-          writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           offset_ += code_size;
+          // Record absolute patch locations.
+          if (!compiled_method->GetPatches().empty()) {
+            uintptr_t base_loc = offset_ - code_size - writer_->oat_header_->GetExecutableOffset();
+            for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+              if (patch.Type() != kLinkerPatchCallRelative) {
+                writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset());
+              }
+            }
+          }
         }
 
         if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
@@ -548,13 +864,51 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
 class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
  public:
   WriteCodeMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
-                             size_t relative_offset)
+                         size_t relative_offset) SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
     : OatDexMethodVisitor(writer, relative_offset),
       out_(out),
-      file_offset_(file_offset) {
+      file_offset_(file_offset),
+      self_(Thread::Current()),
+      old_no_thread_suspension_cause_(self_->StartAssertNoThreadSuspension("OatWriter patching")),
+      class_linker_(Runtime::Current()->GetClassLinker()),
+      dex_cache_(nullptr) {
+    if (writer_->image_writer_ != nullptr) {
+      // If we're creating the image, the address space must be ready so that we can apply patches.
+      CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
+      patched_code_.reserve(16 * KB);
+    }
+    self_->TransitionFromSuspendedToRunnable();
   }
 
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
+  ~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) {
+    self_->EndAssertNoThreadSuspension(old_no_thread_suspension_cause_);
+    self_->TransitionFromRunnableToSuspended(kNative);
+  }
+
+  bool StartClass(const DexFile* dex_file, size_t class_def_index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    OatDexMethodVisitor::StartClass(dex_file, class_def_index);
+    if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
+      dex_cache_ = class_linker_->FindDexCache(*dex_file);
+    }
+    return true;
+  }
+
+  bool EndClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool result = OatDexMethodVisitor::EndClass();
+    if (oat_class_index_ == writer_->oat_classes_.size()) {
+      DCHECK(result);  // OatDexMethodVisitor::EndClass() never fails.
+      offset_ = writer_->relative_call_patcher_->WriteThunks(out_, offset_);
+      if (UNLIKELY(offset_ == 0u)) {
+        PLOG(ERROR) << "Failed to write final relative call thunks";
+        result = false;
+      }
+    }
+    return result;
+  }
+
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
@@ -565,18 +919,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
       const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
       if (quick_code != nullptr) {
         CHECK(compiled_method->GetPortableCode() == nullptr);
+        offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_);
+        if (offset_ == 0u) {
+          ReportWriteFailure("relative call thunk", it);
+          return false;
+        }
         uint32_t aligned_offset = compiled_method->AlignCode(offset_);
         uint32_t aligned_code_delta = aligned_offset - offset_;
         if (aligned_code_delta != 0) {
-          static const uint8_t kPadding[] = {
-              0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
-          };
-          DCHECK_LE(aligned_code_delta, sizeof(kPadding));
-          if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
             ReportWriteFailure("code alignment padding", it);
             return false;
           }
-          writer_->size_code_alignment_ += aligned_code_delta;
           offset_ += aligned_code_delta;
           DCHECK_OFFSET_();
         }
@@ -591,7 +945,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
                    offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         if (method_offsets.code_offset_ >= offset_) {
-          const OatQuickMethodHeader& method_header = oat_class->method_headers_[method_offsets_index_];
+          const OatQuickMethodHeader& method_header =
+              oat_class->method_headers_[method_offsets_index_];
+          writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
           if (!out->WriteFully(&method_header, sizeof(method_header))) {
             ReportWriteFailure("method header", it);
             return false;
@@ -599,6 +955,31 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
           writer_->size_method_header_ += sizeof(method_header);
           offset_ += sizeof(method_header);
           DCHECK_OFFSET_();
+
+          if (!compiled_method->GetPatches().empty()) {
+            patched_code_ =  *quick_code;
+            quick_code = &patched_code_;
+            for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+              if (patch.Type() == kLinkerPatchCallRelative) {
+                // NOTE: Relative calls across oat files are not supported.
+                uint32_t target_offset = GetTargetOffset(patch);
+                uint32_t literal_offset = patch.LiteralOffset();
+                writer_->relative_call_patcher_->Patch(&patched_code_, literal_offset,
+                                                       offset_ + literal_offset, target_offset);
+              } else if (patch.Type() == kLinkerPatchCall) {
+                uint32_t target_offset = GetTargetOffset(patch);
+                PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
+              } else if (patch.Type() == kLinkerPatchMethod) {
+                mirror::ArtMethod* method = GetTargetMethod(patch);
+                PatchObjectAddress(&patched_code_, patch.LiteralOffset(), method);
+              } else if (patch.Type() == kLinkerPatchType) {
+                mirror::Class* type = GetTargetType(patch);
+                PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type);
+              }
+            }
+          }
+
+          writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           if (!out->WriteFully(&(*quick_code)[0], code_size)) {
             ReportWriteFailure("method code", it);
             return false;
@@ -617,11 +998,81 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
  private:
   OutputStream* const out_;
   size_t const file_offset_;
+  Thread* const self_;
+  const char* const old_no_thread_suspension_cause_;  // TODO: Use ScopedAssertNoThreadSuspension.
+  ClassLinker* const class_linker_;
+  mirror::DexCache* dex_cache_;
+  std::vector<uint8_t> patched_code_;
 
   void ReportWriteFailure(const char* what, const ClassDataItemIterator& it) {
     PLOG(ERROR) << "Failed to write " << what << " for "
         << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " to " << out_->GetLocation();
   }
+
+  mirror::ArtMethod* GetTargetMethod(const LinkerPatch& patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    MethodReference ref = patch.TargetMethod();
+    mirror::DexCache* dex_cache =
+        (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(*ref.dex_file);
+    mirror::ArtMethod* method = dex_cache->GetResolvedMethod(ref.dex_method_index);
+    CHECK(method != nullptr);
+    return method;
+  }
+
+  uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    auto target_it = writer_->method_offset_map_.find(patch.TargetMethod());
+    uint32_t target_offset =
+        (target_it != writer_->method_offset_map_.end()) ? target_it->second : 0u;
+    // If there's no compiled code, point to the correct trampoline.
+    if (UNLIKELY(target_offset == 0)) {
+      mirror::ArtMethod* target = GetTargetMethod(patch);
+      DCHECK(target != nullptr);
+      DCHECK_EQ(target->GetQuickOatCodeOffset(), 0u);
+      target_offset = target->IsNative()
+          ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset()
+          : writer_->oat_header_->GetQuickToInterpreterBridgeOffset();
+    }
+    return target_offset;
+  }
+
+  mirror::Class* GetTargetType(const LinkerPatch& patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
+        ? dex_cache_ : class_linker_->FindDexCache(*patch.TargetTypeDexFile());
+    mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
+    CHECK(type != nullptr);
+    return type;
+  }
+
+  void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // NOTE: Direct method pointers across oat files don't use linker patches. However, direct
+    // type pointers across oat files do. (TODO: Investigate why.)
+    if (writer_->image_writer_ != nullptr) {
+      object = writer_->image_writer_->GetImageAddress(object);
+    }
+    uint32_t address = PointerToLowMemUInt32(object);
+    DCHECK_LE(offset + 4, code->size());
+    uint8_t* data = &(*code)[offset];
+    data[0] = address & 0xffu;
+    data[1] = (address >> 8) & 0xffu;
+    data[2] = (address >> 16) & 0xffu;
+    data[3] = (address >> 24) & 0xffu;
+  }
+
+  void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // NOTE: Direct calls across oat files don't use linker patches.
+    DCHECK(writer_->image_writer_ != nullptr);
+    uint32_t address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
+                                             writer_->oat_data_offset_ + target_offset);
+    DCHECK_LE(offset + 4, code->size());
+    uint8_t* data = &(*code)[offset];
+    data[0] = address & 0xffu;
+    data[1] = (address >> 8) & 0xffu;
+    data[2] = (address >> 16) & 0xffu;
+    data[3] = (address >> 24) & 0xffu;
+  }
 };
 
 template <typename DataAccess>
@@ -863,11 +1314,17 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) {
 }
 
 bool OatWriter::Write(OutputStream* out) {
-  const size_t file_offset = out->Seek(0, kSeekCurrent);
+  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
+  if (raw_file_offset == (off_t) -1) {
+    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+    return false;
+  }
+  const size_t file_offset = static_cast<size_t>(raw_file_offset);
 
+  // Reserve space for header. It will be written last - after updating the checksum.
   size_t header_size = oat_header_->GetHeaderSize();
-  if (!out->WriteFully(oat_header_, header_size)) {
-    PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
+  if (out->Seek(header_size, kSeekCurrent) == (off_t) -1) {
+    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
     return false;
   }
   size_oat_header_ += sizeof(OatHeader);
@@ -878,7 +1335,12 @@ bool OatWriter::Write(OutputStream* out) {
     return false;
   }
 
-  size_t relative_offset = out->Seek(0, kSeekCurrent) - file_offset;
+  off_t tables_end_offset = out->Seek(0, kSeekCurrent);
+  if (tables_end_offset == (off_t) -1) {
+    LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
+    return false;
+  }
+  size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
     LOG(ERROR) << "Failed to write oat code to " << out->GetLocation();
@@ -897,6 +1359,12 @@ bool OatWriter::Write(OutputStream* out) {
     return false;
   }
 
+  const off_t oat_end_file_offset = out->Seek(0, kSeekCurrent);
+  if (oat_end_file_offset == (off_t) -1) {
+    LOG(ERROR) << "Failed to get oat end file offset in " << out->GetLocation();
+    return false;
+  }
+
   if (kIsDebugBuild) {
     uint32_t size_total = 0;
     #define DO_STAT(x) \
@@ -922,6 +1390,7 @@ bool OatWriter::Write(OutputStream* out) {
     DO_STAT(size_method_header_);
     DO_STAT(size_code_);
     DO_STAT(size_code_alignment_);
+    DO_STAT(size_relative_call_thunks_);
     DO_STAT(size_mapping_table_);
     DO_STAT(size_vmap_table_);
     DO_STAT(size_gc_map_);
@@ -937,13 +1406,29 @@ bool OatWriter::Write(OutputStream* out) {
     #undef DO_STAT
 
     VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)"; \
-    CHECK_EQ(file_offset + size_total, static_cast<uint32_t>(out->Seek(0, kSeekCurrent)));
+    CHECK_EQ(file_offset + size_total, static_cast<size_t>(oat_end_file_offset));
     CHECK_EQ(size_, size_total);
   }
 
-  CHECK_EQ(file_offset + size_, static_cast<uint32_t>(out->Seek(0, kSeekCurrent)));
+  CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
+  // Write the header now that the checksum is final.
+  if (out->Seek(file_offset, kSeekSet) == (off_t) -1) {
+    PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(raw_file_offset, out->Seek(0, kSeekCurrent));
+  if (!out->WriteFully(oat_header_, header_size)) {
+    PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
+    return false;
+  }
+  if (out->Seek(oat_end_file_offset, kSeekSet) == (off_t) -1) {
+    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
+
   return true;
 }
 
@@ -1070,6 +1555,18 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream* out,
   return relative_offset;
 }
 
+bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
+  static const uint8_t kPadding[] = {
+      0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
+  };
+  DCHECK_LE(aligned_code_delta, sizeof(kPadding));
+  if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+    return false;
+  }
+  size_code_alignment_ += aligned_code_delta;
+  return true;
+}
+
 OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
   offset_ = offset;
   const std::string& location(dex_file.GetLocation());
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 11f8bffd11..5545ba817b 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -23,6 +23,7 @@
 
 #include "driver/compiler_driver.h"
 #include "mem_map.h"
+#include "method_reference.h"
 #include "oat.h"
 #include "mirror/class.h"
 #include "safe_map.h"
@@ -31,6 +32,7 @@ namespace art {
 
 class BitVector;
 class CompiledMethod;
+class ImageWriter;
 class OutputStream;
 
 // OatHeader         variable length with count of D OatDexFiles
@@ -82,6 +84,7 @@ class OatWriter {
             uintptr_t image_file_location_oat_begin,
             int32_t image_patch_delta,
             const CompilerDriver* compiler,
+            ImageWriter* image_writer,
             TimingLogger* timings,
             SafeMap<std::string, std::string>* key_value_store);
 
@@ -93,6 +96,14 @@ class OatWriter {
     return size_;
   }
 
+  const std::vector<uintptr_t>& GetAbsolutePatchLocations() const {
+    return absolute_patch_locations_;
+  }
+
+  void SetOatDataOffset(size_t oat_data_offset) {
+    oat_data_offset_ = oat_data_offset;
+  }
+
   bool Write(OutputStream* out);
 
   ~OatWriter();
@@ -160,6 +171,8 @@ class OatWriter {
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
+  bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+
   class OatDexFile {
    public:
     explicit OatDexFile(size_t offset, const DexFile& dex_file);
@@ -248,6 +261,7 @@ class OatWriter {
   std::vector<DebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
+  ImageWriter* const image_writer_;
 
   // note OatFile does not take ownership of the DexFiles
   const std::vector<const DexFile*>* dex_files_;
@@ -255,6 +269,9 @@ class OatWriter {
   // Size required for Oat data structures.
   size_t size_;
 
+  // Offset of the oat data from the start of the mmapped region of the elf file.
+  size_t oat_data_offset_;
+
   // dependencies on the image.
   uint32_t image_file_location_oat_checksum_;
   uintptr_t image_file_location_oat_begin_;
@@ -296,6 +313,7 @@ class OatWriter {
   uint32_t size_method_header_;
   uint32_t size_code_;
   uint32_t size_code_alignment_;
+  uint32_t size_relative_call_thunks_;
   uint32_t size_mapping_table_;
   uint32_t size_vmap_table_;
   uint32_t size_gc_map_;
@@ -309,6 +327,18 @@ class OatWriter {
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
 
+  class RelativeCallPatcher;
+  class NoRelativeCallPatcher;
+  class X86RelativeCallPatcher;
+  class Thumb2RelativeCallPatcher;
+
+  std::unique_ptr<RelativeCallPatcher> relative_call_patcher_;
+
+  // The locations of absolute patches relative to the start of the executable section.
+  std::vector<uintptr_t> absolute_patch_locations_;
+
+  SafeMap<MethodReference, uint32_t, MethodReferenceComparator> method_offset_map_;
+
   struct CodeOffsetsKeyComparator {
     bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
       if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
@@ -321,6 +351,18 @@ class OatWriter {
       if (UNLIKELY(&lhs->GetVmapTable() != &rhs->GetVmapTable())) {
         return &lhs->GetVmapTable() < &rhs->GetVmapTable();
       }
+      const auto& lhs_patches = lhs->GetPatches();
+      const auto& rhs_patches = rhs->GetPatches();
+      if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
+        return lhs_patches.size() < rhs_patches.size();
+      }
+      auto rit = rhs_patches.begin();
+      for (const LinkerPatch& lpatch : lhs_patches) {
+        if (UNLIKELY(!(lpatch == *rit))) {
+          return lpatch < *rit;
+        }
+        ++rit;
+      }
       return false;
     }
   };
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 33b00d2ac9..5015bd06d9 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -183,9 +183,9 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
 
   // Setup the graph with the entry block and exit block.
   graph_ = new (arena_) HGraph(arena_);
-  entry_block_ = new (arena_) HBasicBlock(graph_);
+  entry_block_ = new (arena_) HBasicBlock(graph_, 0);
   graph_->AddBlock(entry_block_);
-  exit_block_ = new (arena_) HBasicBlock(graph_);
+  exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc);
   graph_->SetEntryBlock(entry_block_);
   graph_->SetExitBlock(exit_block_);
 
@@ -241,7 +241,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_
   branch_targets_.SetSize(code_end - code_ptr);
 
   // Create the first block for the dex instructions, single successor of the entry block.
-  HBasicBlock* block = new (arena_) HBasicBlock(graph_);
+  HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0);
   branch_targets_.Put(0, block);
   entry_block_->AddSuccessor(block);
 
@@ -254,13 +254,13 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_
       int32_t target = instruction.GetTargetOffset() + dex_offset;
       // Create a block for the target instruction.
       if (FindBlockStartingAt(target) == nullptr) {
-        block = new (arena_) HBasicBlock(graph_);
+        block = new (arena_) HBasicBlock(graph_, target);
         branch_targets_.Put(target, block);
       }
       dex_offset += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
       if ((code_ptr < code_end) && (FindBlockStartingAt(dex_offset) == nullptr)) {
-        block = new (arena_) HBasicBlock(graph_);
+        block = new (arena_) HBasicBlock(graph_, dex_offset);
         branch_targets_.Put(dex_offset, block);
       }
     } else {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 3231c99a7b..2a9a7b37ab 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -25,6 +25,7 @@
 #include "gc_map_builder.h"
 #include "leb128.h"
 #include "mapping_table.h"
+#include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 #include "verifier/dex_gc_map.h"
 #include "vmap_table.h"
@@ -518,4 +519,23 @@ void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
   }
 }
 
+void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const {
+  LocationSummary* locations = suspend_check->GetLocations();
+  HBasicBlock* block = suspend_check->GetBlock();
+  DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check);
+  DCHECK(block->IsLoopHeader());
+
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HInstruction* current = it.Current();
+    LiveInterval* interval = current->GetLiveInterval();
+    // We only need to clear bits of loop phis containing objects and allocated in register.
+    // Loop phis allocated on stack already have the object in the stack.
+    if (current->GetType() == Primitive::kPrimNot
+        && interval->HasRegister()
+        && interval->HasSpillSlot()) {
+      locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 55f5d8df5f..b58f3b3efc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -143,6 +143,13 @@ class CodeGenerator : public ArenaObject {
     is_leaf_ = false;
   }
 
+  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
+  // suspend check. This is called when the code generator generates code
+  // for the suspend check at the back edge (instead of where the suspend check
+  // is, which is the loop entry). At this point, the spill slots for the phis
+  // have not been written to.
+  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
+
  protected:
   CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(kUninitializedFrameSize),
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 206ed13b72..1876cb9ca4 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -93,8 +93,8 @@ class StackOverflowCheckSlowPathARM : public SlowPathCode {
 
 class SuspendCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit SuspendCheckSlowPathARM(HSuspendCheck* instruction)
-      : instruction_(instruction) {}
+  explicit SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -104,13 +104,24 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
     __ blx(LR);
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
     codegen->RestoreLiveRegisters(instruction_->GetLocations());
-    __ b(GetReturnLabel());
+    if (successor_ == nullptr) {
+      __ b(GetReturnLabel());
+    } else {
+      __ b(codegen->GetLabelOf(successor_));
+    }
   }
 
-  Label* GetReturnLabel() { return &return_label_; }
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
 
  private:
   HSuspendCheck* const instruction_;
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
   Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM);
@@ -118,9 +129,9 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
 
 class BoundsCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction,
-                                  Location index_location,
-                                  Location length_location)
+  BoundsCheckSlowPathARM(HBoundsCheck* instruction,
+                         Location index_location,
+                         Location length_location)
       : instruction_(instruction),
         index_location_(index_location),
         length_location_(length_location) {}
@@ -562,9 +573,22 @@ void LocationsBuilderARM::VisitGoto(HGoto* got) {
 
 void InstructionCodeGeneratorARM::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  if (GetGraph()->GetExitBlock() == successor) {
-    codegen_->GenerateFrameExit();
-  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+
+  HLoopInformation* info = block->GetLoopInformation();
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
     __ b(codegen_->GetLabelOf(successor));
   }
 }
@@ -1567,14 +1591,34 @@ void LocationsBuilderARM::VisitSuspendCheck(HSuspendCheck* instruction) {
 }
 
 void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                       HBasicBlock* successor) {
   SuspendCheckSlowPathARM* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction);
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
   codegen_->AddSlowPath(slow_path);
 
   __ AddConstant(R4, R4, -1);
   __ cmp(R4, ShifterOperand(0));
-  __ b(slow_path->GetEntryLabel(), LE);
-  __ Bind(slow_path->GetReturnLabel());
+  if (successor == nullptr) {
+    __ b(slow_path->GetEntryLabel(), LE);
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ b(codegen_->GetLabelOf(successor), GT);
+    __ b(slow_path->GetEntryLabel());
+  }
 }
 
 ArmAssembler* ParallelMoveResolverARM::GetAssembler() const {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 0902fb84ec..8c86b7a237 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -83,7 +83,7 @@ class ParallelMoveResolverARM : public ParallelMoveResolver {
 
 class LocationsBuilderARM : public HGraphVisitor {
  public:
-  explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
+  LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
       : HGraphVisitor(graph), codegen_(codegen) {}
 
 #define DECLARE_VISIT_INSTRUCTION(name)     \
@@ -117,6 +117,11 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
   void LoadCurrentMethod(Register reg);
 
  private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0db4311f03..ea67dfda32 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -92,9 +92,9 @@ class StackOverflowCheckSlowPathX86 : public SlowPathCode {
 
 class BoundsCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction,
-                                  Location index_location,
-                                  Location length_location)
+  BoundsCheckSlowPathX86(HBoundsCheck* instruction,
+                         Location index_location,
+                         Location length_location)
       : instruction_(instruction), index_location_(index_location), length_location_(length_location) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -117,8 +117,8 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
 
 class SuspendCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction)
-      : instruction_(instruction) {}
+  explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -126,13 +126,21 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
     codegen->RestoreLiveRegisters(instruction_->GetLocations());
-    __ jmp(GetReturnLabel());
+    if (successor_ == nullptr) {
+      __ jmp(GetReturnLabel());
+    } else {
+      __ jmp(codegen->GetLabelOf(successor_));
+    }
   }
 
-  Label* GetReturnLabel() { return &return_label_; }
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
 
  private:
   HSuspendCheck* const instruction_;
+  HBasicBlock* const successor_;
   Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
@@ -517,9 +525,22 @@ void LocationsBuilderX86::VisitGoto(HGoto* got) {
 
 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  if (GetGraph()->GetExitBlock() == successor) {
-    codegen_->GenerateFrameExit();
-  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+
+  HLoopInformation* info = block->GetLoopInformation();
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
     __ jmp(codegen_->GetLabelOf(successor));
   }
 }
@@ -1558,13 +1579,33 @@ void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
 }
 
 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                       HBasicBlock* successor) {
   SuspendCheckSlowPathX86* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction);
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
   codegen_->AddSlowPath(slow_path);
-  __ fs()->cmpl(Address::Absolute(
+  __ fs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetReturnLabel());
+  if (successor == nullptr) {
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ j(kEqual, codegen_->GetLabelOf(successor));
+    __ jmp(slow_path->GetEntryLabel());
+  }
 }
 
 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ffcaf6076c..23145bfb70 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -119,6 +119,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
   X86Assembler* GetAssembler() const { return assembler_; }
 
  private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 56198aff3a..78c7d9d81b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -98,8 +98,8 @@ class StackOverflowCheckSlowPathX86_64 : public SlowPathCode {
 
 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction)
-      : instruction_(instruction) {}
+  explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -107,13 +107,21 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
     __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true));
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
     codegen->RestoreLiveRegisters(instruction_->GetLocations());
-    __ jmp(GetReturnLabel());
+    if (successor_ == nullptr) {
+      __ jmp(GetReturnLabel());
+    } else {
+      __ jmp(codegen->GetLabelOf(successor_));
+    }
   }
 
-  Label* GetReturnLabel() { return &return_label_; }
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
 
  private:
   HSuspendCheck* const instruction_;
+  HBasicBlock* const successor_;
   Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
@@ -121,9 +129,9 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
 
 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction,
-                                     Location index_location,
-                                     Location length_location)
+  BoundsCheckSlowPathX86_64(HBoundsCheck* instruction,
+                            Location index_location,
+                            Location length_location)
       : instruction_(instruction),
         index_location_(index_location),
         length_location_(length_location) {}
@@ -400,9 +408,22 @@ void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
 
 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  if (GetGraph()->GetExitBlock() == successor) {
-    codegen_->GenerateFrameExit();
-  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+
+  HLoopInformation* info = block->GetLoopInformation();
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
     __ jmp(codegen_->GetLabelOf(successor));
   }
 }
@@ -1403,13 +1424,33 @@ void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                          HBasicBlock* successor) {
   SuspendCheckSlowPathX86_64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction);
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
   codegen_->AddSlowPath(slow_path);
-  __ gs()->cmpl(Address::Absolute(
+  __ gs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetReturnLabel());
+  if (successor == nullptr) {
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ j(kEqual, codegen_->GetLabelOf(successor));
+    __ jmp(slow_path->GetEntryLabel());
+  }
 }
 
 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ea21872100..a299cf6476 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -116,6 +116,11 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
   X86_64Assembler* GetAssembler() const { return assembler_; }
 
  private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
+
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 2f881d1b6d..fe2adc77d0 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -35,7 +35,7 @@ void DeadCodeElimination::Run() {
     for (i.Advance(); !i.Done(); i.Advance()) {
       HInstruction* inst = i.Current();
       DCHECK(!inst->IsControlFlow());
-      if (!inst->HasSideEffects() && !inst->HasUses()) {
+      if (!inst->HasSideEffects() && !inst->HasUses() && !inst->IsSuspendCheck()) {
         block->RemoveInstruction(inst);
       }
     }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index e36b1cdcfd..589b44a167 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -141,6 +141,38 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
     }
     errors_.Insert(error.str());
   }
+
+  // Ensure the inputs of `instruction` are defined in a block of the graph.
+  for (HInputIterator input_it(instruction); !input_it.Done();
+       input_it.Advance()) {
+    HInstruction* input = input_it.Current();
+    const HInstructionList& list = input->IsPhi()
+        ? input->GetBlock()->GetPhis()
+        : input->GetBlock()->GetInstructions();
+    if (!list.Contains(input)) {
+      std::stringstream error;
+      error << "Input " << input->GetId()
+            << " of instruction " << instruction->GetId()
+            << " is not defined in a basic block of the control-flow graph.";
+      errors_.Insert(error.str());
+    }
+  }
+
+  // Ensure the uses of `instruction` are defined in a block of the graph.
+  for (HUseIterator<HInstruction> use_it(instruction->GetUses());
+       !use_it.Done(); use_it.Advance()) {
+    HInstruction* use = use_it.Current()->GetUser();
+    const HInstructionList& list = use->IsPhi()
+        ? use->GetBlock()->GetPhis()
+        : use->GetBlock()->GetInstructions();
+    if (!list.Contains(use)) {
+      std::stringstream error;
+      error << "User " << use->GetId()
+            << " of instruction " << instruction->GetId()
+            << " is not defined in a basic block of the control-flow graph.";
+      errors_.Insert(error.str());
+    }
+  }
 }
 
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
new file mode 100644
index 0000000000..a0de73da32
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier.h"
+
+namespace art {
+
+void InstructionSimplifier::Run() {
+  VisitInsertionOrder();
+}
+
+void InstructionSimplifier::VisitSuspendCheck(HSuspendCheck* check) {
+  HBasicBlock* block = check->GetBlock();
+  // Currently always keep the suspend check at entry.
+  if (block->IsEntryBlock()) return;
+
+  // Currently always keep suspend checks at loop entry.
+  if (block->IsLoopHeader() && block->GetFirstInstruction() == check) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == check);
+    return;
+  }
+
+  // Remove the suspend check that was added at build time for the baseline
+  // compiler.
+  block->RemoveInstruction(check);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
new file mode 100644
index 0000000000..b2f3f521ae
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * Implements optimizations specific to each instruction.
+ */
+class InstructionSimplifier : public HGraphVisitor {
+ public:
+  explicit InstructionSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
+
+  void Run();
+
+ private:
+  virtual void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index a81a30e457..03f8625265 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -146,7 +146,7 @@ TEST(LiveRangesTest, CFG3) {
    *       22: phi
    *       24: return
    *         |
-   *       38: exit
+   *       28: exit
    */
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -194,7 +194,7 @@ TEST(LiveRangesTest, CFG3) {
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop) {
+TEST(LiveRangesTest, Loop1) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -233,6 +233,7 @@ TEST(LiveRangesTest, Loop) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
+  RemoveSuspendChecks(graph);
   x86::CodeGeneratorX86 codegen(graph);
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
@@ -271,4 +272,168 @@ TEST(LiveRangesTest, Loop) {
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
+TEST(LiveRangesTest, Loop2) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  while (a == a) {
+   *    a = a + a;
+   *  }
+   *  return a;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       4: goto
+   *           |
+   *       8: goto
+   *           |
+   *       10: phi
+   *       12: equal
+   *       14: if +++++
+   *        |       \ +
+   *        |     18: suspend
+   *        |     20: add
+   *        |     22: goto
+   *        |
+   *       26: return
+   *         |
+   *       30: exit
+   *
+   * We want to make sure the phi at 10 has a lifetime hole after the add at 20.
+   */
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 6,
+    Instruction::ADD_INT, 0, 0,
+    Instruction::GOTO | 0xFB00,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  x86::CodeGeneratorX86 codegen(graph);
+  SsaLivenessAnalysis liveness(*graph, &codegen);
+  liveness.Analyze();
+
+  // Test for the 0 constant.
+  HIntConstant* constant = liveness.GetInstructionFromSsaIndex(0)->AsIntConstant();
+  LiveInterval* interval = constant->GetLiveInterval();
+  LiveRange* range = interval->GetFirstRange();
+  ASSERT_EQ(2u, range->GetStart());
+  // Last use is the loop phi so instruction is live until
+  // the end of the pre loop header.
+  ASSERT_EQ(10u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+
+  // Test for the loop phi.
+  HPhi* phi = liveness.GetInstructionFromSsaIndex(1)->AsPhi();
+  interval = phi->GetLiveInterval();
+  range = interval->GetFirstRange();
+  ASSERT_EQ(10u, range->GetStart());
+  ASSERT_EQ(21u, range->GetEnd());
+  range = range->GetNext();
+  ASSERT_TRUE(range != nullptr);
+  ASSERT_EQ(24u, range->GetStart());
+  ASSERT_EQ(27u, range->GetEnd());
+
+  // Test for the add instruction.
+  HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd();
+  interval = add->GetLiveInterval();
+  range = interval->GetFirstRange();
+  ASSERT_EQ(20u, range->GetStart());
+  ASSERT_EQ(24u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+}
+
+TEST(LiveRangesTest, CFG4) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  var b = 4;
+   *  if (a == a) {
+   *    a = b + a;
+   *  } else {
+   *    a = b + a
+   *  }
+   *  return b;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       4: constant4
+   *       6: goto
+   *           |
+   *       10: equal
+   *       12: if
+   *       /       \
+   *   16: add    22: add
+   *   18: goto   24: goto
+   *       \       /
+   *       26: phi
+   *       28: return
+   *         |
+   *       32: exit
+   *
+   * We want to make sure the constant0 has a lifetime hole after the 16: add.
+   */
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::IF_EQ, 5,
+    Instruction::ADD_INT, 1 << 8,
+    Instruction::GOTO | 0x300,
+    Instruction::ADD_INT, 1 << 8,
+    Instruction::RETURN | 1 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  x86::CodeGeneratorX86 codegen(graph);
+  SsaLivenessAnalysis liveness(*graph, &codegen);
+  liveness.Analyze();
+
+  // Test for the 0 constant.
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  LiveRange* range = interval->GetFirstRange();
+  ASSERT_EQ(2u, range->GetStart());
+  ASSERT_EQ(16u, range->GetEnd());
+  range = range->GetNext();
+  ASSERT_TRUE(range != nullptr);
+  ASSERT_EQ(20u, range->GetStart());
+  ASSERT_EQ(22u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+
+  // Test for the 4 constant.
+  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  range = interval->GetFirstRange();
+  ASSERT_EQ(4u, range->GetStart());
+  ASSERT_EQ(29u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+
+  // Test for the first add.
+  HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd();
+  interval = add->GetLiveInterval();
+  range = interval->GetFirstRange();
+  ASSERT_EQ(16u, range->GetStart());
+  ASSERT_EQ(20u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+
+  // Test for the second add.
+  add = liveness.GetInstructionFromSsaIndex(3)->AsAdd();
+  interval = add->GetLiveInterval();
+  range = interval->GetFirstRange();
+  ASSERT_EQ(22u, range->GetStart());
+  ASSERT_EQ(26u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+
+  // Test for the phi, which is unused.
+  HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi();
+  ASSERT_EQ(phi->NumberOfUses(), 0u);
+  interval = phi->GetLiveInterval();
+  range = interval->GetFirstRange();
+  ASSERT_EQ(26u, range->GetStart());
+  ASSERT_EQ(28u, range->GetEnd());
+  ASSERT_TRUE(range->GetNext() == nullptr);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 84b2e33ee7..2d861696bb 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -546,4 +546,51 @@ TEST(LivenessTest, Loop7) {
   TestCode(data, expected);
 }
 
+TEST(LivenessTest, Loop8) {
+  // var a = 0;
+  // while (a == a) {
+  //   a = a + a;
+  // }
+  // return a;
+  //
+  // We want to test that the ins of the loop exit
+  // does contain the phi.
+  // Bitsets are made of:
+  // (constant0, phi, add)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (000)\n"
+    "  live out: (100)\n"
+    "  kill: (100)\n"
+    "Block 1\n"  // pre loop header
+    "  live in: (100)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (000)\n"
+    "  live out: (010)\n"
+    "  kill: (010)\n"
+    "Block 3\n"  // back edge
+    "  live in: (010)\n"
+    "  live out: (000)\n"
+    "  kill: (001)\n"
+    "Block 4\n"  // return block
+    "  live in: (010)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
+    "Block 5\n"  // exit block
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 6,
+    Instruction::ADD_INT, 0, 0,
+    Instruction::GOTO | 0xFB00,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 06623b6adc..f358e051ae 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -310,7 +310,7 @@ class LocationSummary : public ArenaObject {
     kCall
   };
 
-  explicit LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
+  LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
 
   void SetInAt(uint32_t at, Location location) {
     inputs_.Put(at, location);
@@ -363,6 +363,10 @@ class LocationSummary : public ArenaObject {
     stack_mask_->SetBit(index);
   }
 
+  void ClearStackBit(uint32_t index) {
+    stack_mask_->ClearBit(index);
+  }
+
   void SetRegisterBit(uint32_t reg_id) {
     register_mask_ |= (1 << reg_id);
   }
@@ -383,6 +387,16 @@ class LocationSummary : public ArenaObject {
     return &live_registers_;
   }
 
+  bool InputOverlapsWithOutputOrTemp(uint32_t input, bool is_environment) const {
+    if (is_environment) return true;
+    Location location = Out();
+    // TODO: Add more policies.
+    if (input == 0 && location.IsUnallocated() && location.GetPolicy() == Location::kSameAsFirstInput) {
+      return false;
+    }
+    return true;
+  }
+
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 09412a9c86..5c4ab8e4c0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -141,7 +141,7 @@ void HGraph::TransformToSSA() {
 void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
   // Insert a new node between `block` and `successor` to split the
   // critical edge.
-  HBasicBlock* new_block = new (arena_) HBasicBlock(this);
+  HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc());
   AddBlock(new_block);
   new_block->AddInstruction(new (arena_) HGoto());
   block->ReplaceSuccessor(successor, new_block);
@@ -162,8 +162,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
   // If there are more than one back edge, make them branch to the same block that
   // will become the only back edge. This simplifies finding natural loops in the
   // graph.
-  if (info->NumberOfBackEdges() > 1) {
-    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this);
+  // Also, if the loop is a do/while (that is the back edge is an if), change the
+  // back edge to be a goto. This simplifies code generation of suspend cheks.
+  if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
+    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(new_back_edge);
     new_back_edge->AddInstruction(new (arena_) HGoto());
     for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
@@ -180,7 +182,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
   // loop.
   size_t number_of_incomings = header->GetPredecessors().Size() - info->NumberOfBackEdges();
   if (number_of_incomings != 1) {
-    HBasicBlock* pre_header = new (arena_) HBasicBlock(this);
+    HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto());
 
@@ -200,6 +202,18 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
   if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
     header->SwapPredecessors();
   }
+
+  // Place the suspend check at the beginning of the header, so that live registers
+  // will be known when allocating registers. Note that code generation can still
+  // generate the suspend check at the back edge, but needs to be careful with
+  // loop phi spill slots (which are not written to at back edge).
+  HInstruction* first_instruction = header->GetFirstInstruction();
+  if (!first_instruction->IsSuspendCheck()) {
+    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
+    header->InsertInstructionBefore(check, first_instruction);
+    first_instruction = check;
+  }
+  info->SetSuspendCheck(first_instruction->AsSuspendCheck());
 }
 
 void HGraph::SimplifyCFG() {
@@ -427,6 +441,15 @@ void HInstructionList::RemoveInstruction(HInstruction* instruction) {
   }
 }
 
+bool HInstructionList::Contains(HInstruction* instruction) const {
+  for (HInstructionIterator it(*this); !it.Done(); it.Advance()) {
+    if (it.Current() == instruction) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool HInstructionList::FoundBefore(const HInstruction* instruction1,
                                    const HInstruction* instruction2) const {
   DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock());
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index be6b355d22..3d65366c43 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -32,6 +32,7 @@ class HInstruction;
 class HIntConstant;
 class HGraphVisitor;
 class HPhi;
+class HSuspendCheck;
 class LiveInterval;
 class LocationSummary;
 
@@ -57,6 +58,9 @@ class HInstructionList {
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
 
+  // Return true if this list contains `instruction`.
+  bool Contains(HInstruction* instruction) const;
+
   // Return true if `instruction1` is found before `instruction2` in
   // this instruction list and false otherwise.  Abort if none
   // of these instructions is found.
@@ -198,6 +202,7 @@ class HLoopInformation : public ArenaObject {
  public:
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
+        suspend_check_(nullptr),
         back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges),
         // Make bit vector growable, as the number of blocks may change.
         blocks_(graph->GetArena(), graph->GetBlocks().Size(), true) {}
@@ -206,6 +211,10 @@ class HLoopInformation : public ArenaObject {
     return header_;
   }
 
+  HSuspendCheck* GetSuspendCheck() const { return suspend_check_; }
+  void SetSuspendCheck(HSuspendCheck* check) { suspend_check_ = check; }
+  bool HasSuspendCheck() const { return suspend_check_ != nullptr; }
+
   void AddBackEdge(HBasicBlock* back_edge) {
     back_edges_.Add(back_edge);
   }
@@ -254,6 +263,7 @@ class HLoopInformation : public ArenaObject {
   void PopulateRecursive(HBasicBlock* block);
 
   HBasicBlock* header_;
+  HSuspendCheck* suspend_check_;
   GrowableArray<HBasicBlock*> back_edges_;
   ArenaBitVector blocks_;
 
@@ -261,13 +271,15 @@ class HLoopInformation : public ArenaObject {
 };
 
 static constexpr size_t kNoLifetime = -1;
+static constexpr uint32_t kNoDexPc = -1;
 
 // A block in a method. Contains the list of instructions represented
 // as a double linked list. Each block knows its predecessors and
 // successors.
+
 class HBasicBlock : public ArenaObject {
  public:
-  explicit HBasicBlock(HGraph* graph)
+  explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
         predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors),
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
@@ -275,6 +287,7 @@ class HBasicBlock : public ArenaObject {
         dominator_(nullptr),
         dominated_blocks_(graph->GetArena(), kDefaultNumberOfDominatedBlocks),
         block_id_(-1),
+        dex_pc_(dex_pc),
         lifetime_start_(kNoLifetime),
         lifetime_end_(kNoLifetime) {}
 
@@ -290,6 +303,14 @@ class HBasicBlock : public ArenaObject {
     return dominated_blocks_;
   }
 
+  bool IsEntryBlock() const {
+    return graph_->GetEntryBlock() == this;
+  }
+
+  bool IsExitBlock() const {
+    return graph_->GetExitBlock() == this;
+  }
+
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
@@ -423,6 +444,8 @@ class HBasicBlock : public ArenaObject {
   void SetLifetimeStart(size_t start) { lifetime_start_ = start; }
   void SetLifetimeEnd(size_t end) { lifetime_end_ = end; }
 
+  uint32_t GetDexPc() const { return dex_pc_; }
+
  private:
   HGraph* const graph_;
   GrowableArray<HBasicBlock*> predecessors_;
@@ -433,6 +456,8 @@ class HBasicBlock : public ArenaObject {
   HBasicBlock* dominator_;
   GrowableArray<HBasicBlock*> dominated_blocks_;
   int block_id_;
+  // The dex program counter of the first instruction of this block.
+  const uint32_t dex_pc_;
   size_t lifetime_start_;
   size_t lifetime_end_;
 
@@ -1271,7 +1296,7 @@ class HLocal : public HTemplateInstruction<0> {
 // Load a given local. The local is an input of this instruction.
 class HLoadLocal : public HExpression<1> {
  public:
-  explicit HLoadLocal(HLocal* local, Primitive::Type type)
+  HLoadLocal(HLocal* local, Primitive::Type type)
       : HExpression(type, SideEffects::None()) {
     SetRawInputAt(0, local);
   }
@@ -1589,7 +1614,7 @@ class HNullCheck : public HExpression<1> {
 
 class FieldInfo : public ValueObject {
  public:
-  explicit FieldInfo(MemberOffset field_offset, Primitive::Type field_type)
+  FieldInfo(MemberOffset field_offset, Primitive::Type field_type)
       : field_offset_(field_offset), field_type_(field_type) {}
 
   MemberOffset GetFieldOffset() const { return field_offset_; }
@@ -1771,7 +1796,7 @@ class HTemporary : public HTemplateInstruction<0> {
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::ChangesSomething()), dex_pc_(dex_pc) {}
+      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {}
 
   virtual bool NeedsEnvironment() const {
     return true;
@@ -1789,8 +1814,8 @@ class HSuspendCheck : public HTemplateInstruction<0> {
 
 class MoveOperands : public ArenaObject {
  public:
-  MoveOperands(Location source, Location destination)
-      : source_(source), destination_(destination) {}
+  MoveOperands(Location source, Location destination, HInstruction* instruction)
+      : source_(source), destination_(destination), instruction_(instruction) {}
 
   Location GetSource() const { return source_; }
   Location GetDestination() const { return destination_; }
@@ -1838,9 +1863,16 @@ class MoveOperands : public ArenaObject {
     return source_.IsInvalid();
   }
 
+  HInstruction* GetInstruction() const { return instruction_; }
+
  private:
   Location source_;
   Location destination_;
+  // The instruction this move is assocatied with. Null when this move is
+  // for moving an input in the expected locations of user (including a phi user).
+  // This is only used in debug mode, to ensure we do not connect interval siblings
+  // in the same parallel move.
+  HInstruction* instruction_;
 
   DISALLOW_COPY_AND_ASSIGN(MoveOperands);
 };
@@ -1853,6 +1885,12 @@ class HParallelMove : public HTemplateInstruction<0> {
       : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {}
 
   void AddMove(MoveOperands* move) {
+    if (kIsDebugBuild && move->GetInstruction() != nullptr) {
+      for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+        DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction())
+          << "Doing parallel moves for the same instruction.";
+      }
+    }
     moves_.Add(move);
   }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 702eba183c..65bdb18812 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,6 +26,7 @@
 #include "driver/dex_compilation_unit.h"
 #include "graph_visualizer.h"
 #include "gvn.h"
+#include "instruction_simplifier.h"
 #include "nodes.h"
 #include "register_allocator.h"
 #include "ssa_phi_elimination.h"
@@ -261,6 +262,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
 
     SsaRedundantPhiElimination(graph).Run();
     SsaDeadPhiElimination(graph).Run();
+    InstructionSimplifier(graph).Run();
     GlobalValueNumberer(graph->GetArena(), graph).Run();
     visualizer.DumpGraph(kGVNPassName);
 
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 093856d497..863e107ee6 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -71,7 +71,8 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
   for (size_t i = 0; i < number_of_moves; ++i) {
     moves->AddMove(new (allocator) MoveOperands(
         Location::RegisterLocation(ManagedRegister(operands[i][0])),
-        Location::RegisterLocation(ManagedRegister(operands[i][1]))));
+        Location::RegisterLocation(ManagedRegister(operands[i][1])),
+        nullptr));
   }
   return moves;
 }
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1ac9b78a7e..1d1d694ad2 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -205,7 +205,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
   LiveInterval* current = instruction->GetLiveInterval();
   if (current == nullptr) return;
 
-  DCHECK(unhandled.IsEmpty() || current->StartsBefore(unhandled.Peek()));
+  DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
   // Some instructions define their output in fixed register/stack slot. We need
   // to ensure we know these locations before doing register allocation. For a
   // given register, we create an interval that covers these locations. The register
@@ -228,7 +228,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
     // Split before first register use.
     size_t first_register_use = current->FirstRegisterUse();
     if (first_register_use != kNoLifetime) {
-      LiveInterval* split = Split(current, first_register_use - 1);
+      LiveInterval* split = Split(current, first_register_use);
       // Don't add direclty to `unhandled`, it needs to be sorted and the start
       // of this new interval might be after intervals already in the list.
       AddSorted(&unhandled, split);
@@ -236,7 +236,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
       // Nothing to do, we won't allocate a register for this value.
     }
   } else {
-    DCHECK(unhandled.IsEmpty() || current->StartsBefore(unhandled.Peek()));
+    DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
     unhandled.Add(current);
   }
 }
@@ -586,7 +586,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
     // If the first use of that instruction is after the last use of the found
     // register, we split this interval just before its first register use.
     AllocateSpillSlotFor(current);
-    LiveInterval* split = Split(current, first_register_use - 1);
+    LiveInterval* split = Split(current, first_register_use);
     AddSorted(unhandled_, split);
     return false;
   } else {
@@ -685,14 +685,6 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   }
   size_t end = last_sibling->GetEnd();
 
-  if (NeedTwoSpillSlot(parent->GetType())) {
-    AllocateTwoSpillSlots(parent, end);
-  } else {
-    AllocateOneSpillSlot(parent, end);
-  }
-}
-
-void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) {
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
@@ -706,35 +698,25 @@ void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end)
     }
   }
 
-  if (slot == spill_slots_.Size()) {
-    // We need a new spill slot.
-    spill_slots_.Add(end);
-    spill_slots_.Add(end);
-  } else if (slot == spill_slots_.Size() - 1) {
-    spill_slots_.Put(slot, end);
-    spill_slots_.Add(end);
-  } else {
-    spill_slots_.Put(slot, end);
-    spill_slots_.Put(slot + 1, end);
-  }
-
-  parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
-}
-
-void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) {
-  // Find an available spill slot.
-  size_t slot = 0;
-  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
-    if (spill_slots_.Get(slot) <= parent->GetStart()) {
-      break;
+  if (NeedTwoSpillSlot(parent->GetType())) {
+    if (slot == spill_slots_.Size()) {
+      // We need a new spill slot.
+      spill_slots_.Add(end);
+      spill_slots_.Add(end);
+    } else if (slot == spill_slots_.Size() - 1) {
+      spill_slots_.Put(slot, end);
+      spill_slots_.Add(end);
+    } else {
+      spill_slots_.Put(slot, end);
+      spill_slots_.Put(slot + 1, end);
     }
-  }
-
-  if (slot == spill_slots_.Size()) {
-    // We need a new spill slot.
-    spill_slots_.Add(end);
   } else {
-    spill_slots_.Put(slot, end);
+    if (slot == spill_slots_.Size()) {
+      // We need a new spill slot.
+      spill_slots_.Add(end);
+    } else {
+      spill_slots_.Put(slot, end);
+    }
   }
 
   parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
@@ -771,30 +753,31 @@ static bool IsValidDestination(Location destination) {
   return destination.IsRegister() || destination.IsStackSlot() || destination.IsDoubleStackSlot();
 }
 
-void RegisterAllocator::AddInputMoveFor(HInstruction* instruction,
+void RegisterAllocator::AddInputMoveFor(HInstruction* user,
                                         Location source,
                                         Location destination) const {
   DCHECK(IsValidDestination(destination));
   if (source.Equals(destination)) return;
 
-  DCHECK(instruction->AsPhi() == nullptr);
+  DCHECK(user->AsPhi() == nullptr);
 
-  HInstruction* previous = instruction->GetPrevious();
+  HInstruction* previous = user->GetPrevious();
   HParallelMove* move = nullptr;
   if (previous == nullptr
       || previous->AsParallelMove() == nullptr
       || !IsInputMove(previous)) {
     move = new (allocator_) HParallelMove(allocator_);
     move->SetLifetimePosition(kInputMoveLifetimePosition);
-    instruction->GetBlock()->InsertInstructionBefore(move, instruction);
+    user->GetBlock()->InsertInstructionBefore(move, user);
   } else {
     move = previous->AsParallelMove();
   }
   DCHECK(IsInputMove(move));
-  move->AddMove(new (allocator_) MoveOperands(source, destination));
+  move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr));
 }
 
 void RegisterAllocator::InsertParallelMoveAt(size_t position,
+                                             HInstruction* instruction,
                                              Location source,
                                              Location destination) const {
   DCHECK(IsValidDestination(destination));
@@ -820,7 +803,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
   } else {
     // Move must happen before the instruction.
     HInstruction* previous = at->GetPrevious();
-    if (previous != nullptr && previous->AsParallelMove() != nullptr) {
+    if (previous != nullptr && previous->IsParallelMove()) {
       // This is a parallel move for connecting siblings in a same block. We need to
       // differentiate it with moves for connecting blocks, and input moves.
       if (previous->GetLifetimePosition() != position) {
@@ -831,7 +814,15 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
         previous = previous->GetPrevious();
       }
     }
-    if (previous == nullptr || previous->AsParallelMove() == nullptr) {
+    if (previous == nullptr
+        || !previous->IsParallelMove()
+        || previous->GetLifetimePosition() != position) {
+      // If the previous is a parallel move, then its position must be lower
+      // than the given `position`: it was added just after the non-parallel
+      // move instruction that precedes `instruction`.
+      DCHECK(previous == nullptr
+             || !previous->IsParallelMove()
+             || previous->GetLifetimePosition() < position);
       move = new (allocator_) HParallelMove(allocator_);
       move->SetLifetimePosition(position);
       at->GetBlock()->InsertInstructionBefore(move, at);
@@ -839,10 +830,11 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
       move = previous->AsParallelMove();
     }
   }
-  move->AddMove(new (allocator_) MoveOperands(source, destination));
+  move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
 }
 
 void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                                   HInstruction* instruction,
                                                    Location source,
                                                    Location destination) const {
   DCHECK(IsValidDestination(destination));
@@ -854,7 +846,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
   HParallelMove* move;
   // This is a parallel move for connecting blocks. We need to differentiate
   // it with moves for connecting siblings in a same block, and output moves.
-  if (previous == nullptr || previous->AsParallelMove() == nullptr
+  if (previous == nullptr || !previous->IsParallelMove()
       || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) {
     move = new (allocator_) HParallelMove(allocator_);
     move->SetLifetimePosition(block->GetLifetimeEnd());
@@ -862,10 +854,11 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
   } else {
     move = previous->AsParallelMove();
   }
-  move->AddMove(new (allocator_) MoveOperands(source, destination));
+  move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
 }
 
 void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                                    HInstruction* instruction,
                                                     Location source,
                                                     Location destination) const {
   DCHECK(IsValidDestination(destination));
@@ -880,7 +873,7 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
     move->SetLifetimePosition(block->GetLifetimeStart());
     block->InsertInstructionBefore(move, first);
   }
-  move->AddMove(new (allocator_) MoveOperands(source, destination));
+  move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
 }
 
 void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
@@ -890,7 +883,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
   if (source.Equals(destination)) return;
 
   if (instruction->AsPhi() != nullptr) {
-    InsertParallelMoveAtEntryOf(instruction->GetBlock(), source, destination);
+    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
     return;
   }
 
@@ -904,7 +897,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
     move->SetLifetimePosition(position);
     instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
   }
-  move->AddMove(new (allocator_) MoveOperands(source, destination));
+  move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
 }
 
 void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
@@ -948,7 +941,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
         && next_sibling->HasRegister()
         && current->GetEnd() == next_sibling->GetStart()) {
       Location destination = ConvertToLocation(next_sibling);
-      InsertParallelMoveAt(current->GetEnd(), source, destination);
+      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
     }
 
     // At each safepoint, we record stack and register information.
@@ -995,7 +988,11 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
   }
 
   size_t from_position = from->GetLifetimeEnd() - 1;
-  size_t to_position = to->GetLifetimeStart();
+  // When an instructions dies at entry of another, and the latter is the beginning
+  // of a block, the register allocator ensures the former has a register
+  // at block->GetLifetimeStart() + 1. Since this is at a block boundary, it must
+  // must be handled in this method.
+  size_t to_position = to->GetLifetimeStart() + 1;
 
   LiveInterval* destination = nullptr;
   LiveInterval* source = nullptr;
@@ -1021,6 +1018,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
     return;
   }
 
+  DCHECK(destination != nullptr && source != nullptr);
+
   if (!destination->HasRegister()) {
     // Values are eagerly spilled. Spill slot already contains appropriate value.
     return;
@@ -1029,10 +1028,16 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
   // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
   // we need to put the moves at the entry of `to`.
   if (from->GetSuccessors().Size() == 1) {
-    InsertParallelMoveAtExitOf(from, ConvertToLocation(source), ConvertToLocation(destination));
+    InsertParallelMoveAtExitOf(from,
+                               interval->GetParent()->GetDefinedBy(),
+                               ConvertToLocation(source),
+                               ConvertToLocation(destination));
   } else {
     DCHECK_EQ(to->GetPredecessors().Size(), 1u);
-    InsertParallelMoveAtEntryOf(to, ConvertToLocation(source), ConvertToLocation(destination));
+    InsertParallelMoveAtEntryOf(to,
+                                interval->GetParent()->GetDefinedBy(),
+                                ConvertToLocation(source),
+                                ConvertToLocation(destination));
   }
 }
 
@@ -1115,7 +1120,7 @@ void RegisterAllocator::Resolve() {
         Location source = FindLocationAt(input->GetLiveInterval(),
                                          predecessor->GetLastInstruction()->GetLifetimePosition());
         Location destination = ConvertToLocation(phi->GetLiveInterval());
-        InsertParallelMoveAtExitOf(predecessor, source, destination);
+        InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination);
       }
     }
   }
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 3c305c8f58..d4c233a7f8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -100,8 +100,6 @@ class RegisterAllocator {
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
-  void AllocateOneSpillSlot(LiveInterval* interval, size_t end);
-  void AllocateTwoSpillSlots(LiveInterval* interval, size_t end);
 
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);
@@ -110,11 +108,20 @@ class RegisterAllocator {
   void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
 
   // Helper methods to insert parallel moves in the graph.
-  void InsertParallelMoveAtExitOf(HBasicBlock* block, Location source, Location destination) const;
-  void InsertParallelMoveAtEntryOf(HBasicBlock* block, Location source, Location destination) const;
+  void InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                  HInstruction* instruction,
+                                  Location source,
+                                  Location destination) const;
+  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                   HInstruction* instruction,
+                                   Location source,
+                                   Location destination) const;
   void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
-  void AddInputMoveFor(HInstruction* instruction, Location source, Location destination) const;
-  void InsertParallelMoveAt(size_t position, Location source, Location destination) const;
+  void AddInputMoveFor(HInstruction* user, Location source, Location destination) const;
+  void InsertParallelMoveAt(size_t position,
+                            HInstruction* instruction,
+                            Location source,
+                            Location destination) const;
 
   // Helper methods.
   void AllocateRegistersInternal();
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 3e3b6b12a2..535a768ea1 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -339,7 +339,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
   HAdd* last_add = graph->GetBlocks().Get(1)->GetLastInstruction()->GetPrevious()->AsAdd();
   ASSERT_EQ(last_add->InputAt(0), first_add);
   LiveInterval* interval = first_add->GetLiveInterval();
-  ASSERT_EQ(interval->GetEnd(), last_add->GetLifetimePosition() + 1);
+  ASSERT_EQ(interval->GetEnd(), last_add->GetLifetimePosition());
   ASSERT_TRUE(interval->GetNextSibling() == nullptr);
 
   // We need a register for the output of the instruction.
@@ -348,14 +348,14 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
   // Split at the next instruction.
   interval = interval->SplitAt(first_add->GetLifetimePosition() + 2);
   // The user of the split is the last add.
-  ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition() + 1);
+  ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1);
 
   // Split before the last add.
   LiveInterval* new_interval = interval->SplitAt(last_add->GetLifetimePosition() - 1);
   // Ensure the current interval has no register use...
   ASSERT_EQ(interval->FirstRegisterUse(), kNoLifetime);
   // And the new interval has it for the last add.
-  ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition() + 1);
+  ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1);
 }
 
 TEST(RegisterAllocatorTest, DeadPhi) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 680cc0a033..cd13d81a36 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -189,6 +189,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
     }
 
     // Add a range that covers this block to all instructions live_in because of successors.
+    // Instructions defined in this block will have their start of the range adjusted.
     for (uint32_t idx : live_in->Indexes()) {
       HInstruction* current = instructions_from_ssa_index_.Get(idx);
       current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd());
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index dea6181cb2..c62e61b2cd 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -76,7 +76,7 @@ class LiveRange : public ArenaObject {
 
  private:
   size_t start_;
-  const size_t end_;
+  size_t end_;
   LiveRange* next_;
 
   friend class LiveInterval;
@@ -99,13 +99,16 @@ class UsePosition : public ArenaObject {
         is_environment_(is_environment),
         position_(position),
         next_(next) {
-    DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition() + 1);
+    DCHECK(user->IsPhi()
+        || (GetPosition() == user->GetLifetimePosition() + 1)
+        || (GetPosition() == user->GetLifetimePosition()));
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
   size_t GetPosition() const { return position_; }
 
   UsePosition* GetNext() const { return next_; }
+  void SetNext(UsePosition* next) { next_ = next; }
 
   HInstruction* GetUser() const { return user_; }
 
@@ -122,7 +125,7 @@ class UsePosition : public ArenaObject {
   const size_t input_index_;
   const bool is_environment_;
   const size_t position_;
-  UsePosition* const next_;
+  UsePosition* next_;
 
   DISALLOW_COPY_AND_ASSIGN(UsePosition);
 };
@@ -174,23 +177,46 @@ class LiveInterval : public ArenaObject {
 
   void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
     // Set the use within the instruction.
-    // TODO: Use the instruction's location to know whether the instruction can die
-    // at entry, or needs to say alive within the user.
-    size_t position = instruction->GetLifetimePosition() + 1;
+    size_t position = instruction->GetLifetimePosition();
+    if (instruction->GetLocations()->InputOverlapsWithOutputOrTemp(input_index, is_environment)) {
+      // If it overlaps, we need to make sure the user will not try to allocate a temp
+      // or its output to the same register.
+      ++position;
+    }
+    if ((first_use_ != nullptr)
+        && (first_use_->GetUser() == instruction)
+        && (first_use_->GetPosition() < position)) {
+      // The user uses the instruction multiple times, and one use dies before the other.
+      // We update the use list so that the latter is first.
+      DCHECK(first_use_->GetPosition() + 1 == position);
+      UsePosition* new_use = new (allocator_) UsePosition(
+          instruction, input_index, is_environment, position, first_use_->GetNext());
+      first_use_->SetNext(new_use);
+      if (first_range_->GetEnd() == first_use_->GetPosition()) {
+        first_range_->end_ = position;
+      }
+      return;
+    }
+
     size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
-    size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd();
     if (first_range_ == nullptr) {
       // First time we see a use of that interval.
-      first_range_ = last_range_ = new (allocator_) LiveRange(start_block_position, position, nullptr);
+      first_range_ = last_range_ = new (allocator_) LiveRange(
+          start_block_position, position, nullptr);
     } else if (first_range_->GetStart() == start_block_position) {
-      // There is a use later in the same block.
+      // There is a use later in the same block or in a following block.
+      // Note that in such a case, `AddRange` for the whole blocks has been called
+      // before arriving in this method, and this is the reason the start of
+      // `first_range_` is before the given `position`.
       DCHECK_LE(position, first_range_->GetEnd());
-    } else if (first_range_->GetStart() == end_block_position) {
-      // Last use is in the following block.
-      first_range_->start_ = start_block_position;
     } else {
       DCHECK(first_range_->GetStart() > position);
       // There is a hole in the interval. Create a new range.
+      // Note that the start of `first_range_` can be equal to `end`: two blocks
+      // having adjacent lifetime positions are not necessarily
+      // predecessor/successor. When two blocks are predecessor/successor, the
+      // liveness algorithm has called `AddRange` before arriving in this method,
+      // and the check line 205 would succeed.
       first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_);
     }
     first_use_ = new (allocator_) UsePosition(
@@ -198,7 +224,7 @@ class LiveInterval : public ArenaObject {
   }
 
   void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
-    DCHECK(instruction->AsPhi() != nullptr);
+    DCHECK(instruction->IsPhi());
     first_use_ = new (allocator_) UsePosition(
         instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
   }
@@ -339,7 +365,9 @@ class LiveInterval : public ArenaObject {
       if (use_position >= position && !use->GetIsEnvironment()) {
         Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
         if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-          return use_position;
+          // Return the lifetime just before the user, so that the interval has a register
+          // when entering the user.
+          return use->GetUser()->GetLifetimePosition() - 1;
         }
       }
       use = use->GetNext();
@@ -428,12 +456,12 @@ class LiveInterval : public ArenaObject {
     return nullptr;
   }
 
-  bool StartsBefore(LiveInterval* other) const {
+  bool StartsBeforeOrAt(LiveInterval* other) const {
     return GetStart() <= other->GetStart();
   }
 
   bool StartsAfter(LiveInterval* other) const {
-    return GetStart() >= other->GetStart();
+    return GetStart() > other->GetStart();
   }
 
   void Dump(std::ostream& stream) const {
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index ad3b205830..fffe5c2b44 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -83,10 +83,10 @@ static void TestCode(const uint16_t* data, const char* expected) {
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
 
+  graph->BuildDominatorTree();
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->BuildDominatorTree();
   graph->TransformToSSA();
   ReNumberInstructions(graph);
 
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
index da49524ee2..516ac2b388 100644
--- a/compiler/utils/arena_allocator.cc
+++ b/compiler/utils/arena_allocator.cc
@@ -35,12 +35,23 @@ template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
   "Misc       ",
   "BasicBlock ",
+  "BBList     "
+  "BBPreds    ",
+  "DfsPreOrd  ",
+  "DfsPostOrd ",
+  "DomPostOrd ",
+  "TopoOrd    ",
+  "Lowering   ",
   "LIR        ",
   "LIR masks  ",
+  "SwitchTbl  ",
+  "FillArray  ",
+  "SlowPaths  ",
   "MIR        ",
   "DataFlow   ",
   "GrowList   ",
   "GrowBitMap ",
+  "SSA2Dalvik ",
   "Dalvik2SSA ",
   "DebugInfo  ",
   "Successor  ",
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 7bfbb6f93b..b2f5ca9755 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -44,12 +44,23 @@ static constexpr bool kArenaAllocatorCountAllocations = false;
 enum ArenaAllocKind {
   kArenaAllocMisc,
   kArenaAllocBB,
+  kArenaAllocBBList,
+  kArenaAllocBBPredecessors,
+  kArenaAllocDfsPreOrder,
+  kArenaAllocDfsPostOrder,
+  kArenaAllocDomPostOrder,
+  kArenaAllocTopologicalSortOrder,
+  kArenaAllocLoweringInfo,
   kArenaAllocLIR,
   kArenaAllocLIRResourceMask,
+  kArenaAllocSwitchTable,
+  kArenaAllocFillArrayData,
+  kArenaAllocSlowPaths,
   kArenaAllocMIR,
   kArenaAllocDFInfo,
   kArenaAllocGrowableArray,
   kArenaAllocGrowableBitMap,
+  kArenaAllocSSAToDalvikMap,
   kArenaAllocDalvikToSSAMap,
   kArenaAllocDebugInfo,
   kArenaAllocSuccessor,
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 671ccb642c..637a1ff297 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -130,7 +130,7 @@ uint32_t ShifterOperand::encodingThumb() const {
             return ROR << 4 | static_cast<uint32_t>(rm_);
           } else {
             uint32_t imm3 = immed_ >> 2;
-            uint32_t imm2 = immed_ & 0b11;
+            uint32_t imm2 = immed_ & 3U /* 0b11 */;
 
             return imm3 << 12 | imm2 << 6 | shift_ << 4 |
                 static_cast<uint32_t>(rm_);
@@ -229,8 +229,8 @@ uint32_t Address::encodingThumb(bool is_32bit) const {
       uint32_t PUW = am >> 21;   // Move down to bottom of word.
       PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
       // If P is 0 then W must be 1 (Different from ARM).
-      if ((PUW & 0b100) == 0) {
-        PUW |= 0b1;
+      if ((PUW & 4U /* 0b100 */) == 0) {
+        PUW |= 1U /* 0b1 */;
       }
       encoding |= B11 | PUW << 8 | offset;
     } else {
@@ -267,17 +267,17 @@ uint32_t Address::encodingThumbLdrdStrd() const {
   uint32_t am = am_;
   // If P is 0 then W must be 1 (Different from ARM).
   uint32_t PU1W = am_ >> 21;   // Move down to bottom of word.
-  if ((PU1W & 0b1000) == 0) {
+  if ((PU1W & 8U /* 0b1000 */) == 0) {
     am |= 1 << 21;      // Set W bit.
   }
   if (offset_ < 0) {
     int32_t off = -offset_;
     CHECK_LT(off, 1024);
-    CHECK_EQ((off & 0b11), 0);    // Must be multiple of 4.
+    CHECK_EQ((off & 3 /* 0b11 */), 0);    // Must be multiple of 4.
     encoding = (am ^ (1 << kUShift)) | off >> 2;  // Flip U to adjust sign.
   } else {
     CHECK_LT(offset_, 1024);
-    CHECK_EQ((offset_ & 0b11), 0);    // Must be multiple of 4.
+    CHECK_EQ((offset_ & 3 /* 0b11 */), 0);    // Must be multiple of 4.
     encoding =  am | offset_ >> 2;
   }
   encoding |= static_cast<uint32_t>(rn_) << 16;
@@ -886,8 +886,8 @@ uint32_t ArmAssembler::ModifiedImmediate(uint32_t value) {
   /* Put it all together */
   uint32_t v = 8 + z_leading;
 
-  uint32_t i = (v & 0b10000) >> 4;
-  uint32_t imm3 = (v >> 1) & 0b111;
+  uint32_t i = (v & 16U /* 0b10000 */) >> 4;
+  uint32_t imm3 = (v >> 1) & 7U /* 0b111 */;
   uint32_t a = v & 1;
   return value | i << 26 | imm3 << 12 | a << 7;
 }
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 267bba8521..6af69c86ce 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -955,11 +955,11 @@ void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl,
   if (dbl) {
     // Encoded as D:Vd.
     D = (reg >> 4) & 1;
-    Vd = reg & 0b1111;
+    Vd = reg & 15U /* 0b1111 */;
   } else {
     // Encoded as Vd:D.
     D = reg & 1;
-    Vd = (reg >> 1) & 0b1111;
+    Vd = (reg >> 1) & 15U /* 0b1111 */;
   }
   int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
                     B11 | B9 |
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 4904428489..7968a7774e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -159,8 +159,8 @@ void Thumb2Assembler::mul(Register rd, Register rn, Register rm, Condition cond)
     Emit16(encoding);
   } else {
     // 32 bit.
-    uint32_t op1 = 0b000;
-    uint32_t op2 = 0b00;
+    uint32_t op1 = 0U /* 0b000 */;
+    uint32_t op2 = 0U /* 0b00 */;
     int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
         op1 << 20 |
         B15 | B14 | B13 | B12 |
@@ -176,8 +176,8 @@ void Thumb2Assembler::mul(Register rd, Register rn, Register rm, Condition cond)
 
 void Thumb2Assembler::mla(Register rd, Register rn, Register rm, Register ra,
                           Condition cond) {
-  uint32_t op1 = 0b000;
-  uint32_t op2 = 0b00;
+  uint32_t op1 = 0U /* 0b000 */;
+  uint32_t op2 = 0U /* 0b00 */;
   int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
       op1 << 20 |
       op2 << 4 |
@@ -192,8 +192,8 @@ void Thumb2Assembler::mla(Register rd, Register rn, Register rm, Register ra,
 
 void Thumb2Assembler::mls(Register rd, Register rn, Register rm, Register ra,
                           Condition cond) {
-  uint32_t op1 = 0b000;
-  uint32_t op2 = 0b01;
+  uint32_t op1 = 0U /* 0b000 */;
+  uint32_t op2 = 01 /* 0b01 */;
   int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
       op1 << 20 |
       op2 << 4 |
@@ -208,8 +208,8 @@ void Thumb2Assembler::mls(Register rd, Register rn, Register rm, Register ra,
 
 void Thumb2Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
                             Register rm, Condition cond) {
-  uint32_t op1 = 0b010;
-  uint32_t op2 = 0b0000;
+  uint32_t op1 = 2U /* 0b010; */;
+  uint32_t op2 = 0U /* 0b0000 */;
   int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 |
       op1 << 20 |
       op2 << 4 |
@@ -223,8 +223,8 @@ void Thumb2Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
 
 
 void Thumb2Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond) {
-  uint32_t op1 = 0b001;
-  uint32_t op2 = 0b1111;
+  uint32_t op1 = 1U  /* 0b001 */;
+  uint32_t op2 = 15U /* 0b1111 */;
   int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 | B20 |
       op1 << 20 |
       op2 << 4 |
@@ -238,8 +238,8 @@ void Thumb2Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond
 
 
 void Thumb2Assembler::udiv(Register rd, Register rn, Register rm, Condition cond) {
-  uint32_t op1 = 0b001;
-  uint32_t op2 = 0b1111;
+  uint32_t op1 = 1U  /* 0b001 */;
+  uint32_t op2 = 15U /* 0b1111 */;
   int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 | B21 | B20 |
       op1 << 20 |
       op2 << 4 |
@@ -733,29 +733,29 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond,
                                               Register rn,
                                               Register rd,
                                               const ShifterOperand& so) {
-  uint8_t thumb_opcode = 0b11111111;
+  uint8_t thumb_opcode = 255U /* 0b11111111 */;
   switch (opcode) {
-    case AND: thumb_opcode = 0b0000; break;
-    case EOR: thumb_opcode = 0b0100; break;
-    case SUB: thumb_opcode = 0b1101; break;
-    case RSB: thumb_opcode = 0b1110; break;
-    case ADD: thumb_opcode = 0b1000; break;
-    case ADC: thumb_opcode = 0b1010; break;
-    case SBC: thumb_opcode = 0b1011; break;
+    case AND: thumb_opcode =  0U /* 0b0000 */; break;
+    case EOR: thumb_opcode =  4U /* 0b0100 */; break;
+    case SUB: thumb_opcode = 13U /* 0b1101 */; break;
+    case RSB: thumb_opcode = 14U /* 0b1110 */; break;
+    case ADD: thumb_opcode =  8U /* 0b1000 */; break;
+    case ADC: thumb_opcode = 10U /* 0b1010 */; break;
+    case SBC: thumb_opcode = 11U /* 0b1011 */; break;
     case RSC: break;
-    case TST: thumb_opcode = 0b0000; set_cc = true; rd = PC; break;
-    case TEQ: thumb_opcode = 0b0100; set_cc = true; rd = PC; break;
-    case CMP: thumb_opcode = 0b1101; set_cc = true; rd = PC; break;
-    case CMN: thumb_opcode = 0b1000; set_cc = true; rd = PC; break;
-    case ORR: thumb_opcode = 0b0010; break;
-    case MOV: thumb_opcode = 0b0010; rn = PC; break;
-    case BIC: thumb_opcode = 0b0001; break;
-    case MVN: thumb_opcode = 0b0011; rn = PC; break;
+    case TST: thumb_opcode =  0U /* 0b0000 */; set_cc = true; rd = PC; break;
+    case TEQ: thumb_opcode =  4U /* 0b0100 */; set_cc = true; rd = PC; break;
+    case CMP: thumb_opcode = 13U /* 0b1101 */; set_cc = true; rd = PC; break;
+    case CMN: thumb_opcode =  8U /* 0b1000 */; set_cc = true; rd = PC; break;
+    case ORR: thumb_opcode =  2U /* 0b0010 */; break;
+    case MOV: thumb_opcode =  2U /* 0b0010 */; rn = PC; break;
+    case BIC: thumb_opcode =  1U /* 0b0001 */; break;
+    case MVN: thumb_opcode =  3U /* 0b0011 */; rn = PC; break;
     default:
       break;
   }
 
-  if (thumb_opcode == 0b11111111) {
+  if (thumb_opcode == 255U /* 0b11111111 */) {
     LOG(FATAL) << "Invalid thumb2 opcode " << opcode;
   }
 
@@ -764,14 +764,14 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond,
     // Check special cases.
     if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
       if (opcode == SUB) {
-        thumb_opcode = 0b0101;
+        thumb_opcode = 5U /* 0b0101 */;
       } else {
         thumb_opcode = 0;
       }
       uint32_t imm = so.GetImmediate();
 
       uint32_t i = (imm >> 11) & 1;
-      uint32_t imm3 = (imm >> 8) & 0b111;
+      uint32_t imm3 = (imm >> 8) & 7U /* 0b111 */;
       uint32_t imm8 = imm & 0xff;
 
       encoding = B31 | B30 | B29 | B28 | B25 |
@@ -817,9 +817,9 @@ void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
     Emit16BitAddSub(cond, opcode, set_cc, rn, rd, so);
     return;
   }
-  uint8_t thumb_opcode = 0b11111111;
+  uint8_t thumb_opcode = 255U /* 0b11111111 */;
   // Thumb1.
-  uint8_t dp_opcode = 0b01;
+  uint8_t dp_opcode = 1U /* 0b01 */;
   uint8_t opcode_shift = 6;
   uint8_t rd_shift = 0;
   uint8_t rn_shift = 3;
@@ -839,13 +839,13 @@ void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
     rn = so.GetRegister();
 
     switch (so.GetShift()) {
-    case LSL: thumb_opcode = 0b00; break;
-    case LSR: thumb_opcode = 0b01; break;
-    case ASR: thumb_opcode = 0b10; break;
+    case LSL: thumb_opcode = 0U /* 0b00 */; break;
+    case LSR: thumb_opcode = 1U /* 0b01 */; break;
+    case ASR: thumb_opcode = 2U /* 0b10 */; break;
     case ROR:
       // ROR doesn't allow immediates.
-      thumb_opcode = 0b111;
-      dp_opcode = 0b01;
+      thumb_opcode = 7U /* 0b111 */;
+      dp_opcode = 1U /* 0b01 */;
       opcode_shift = 6;
       use_immediate = false;
       break;
@@ -860,68 +860,68 @@ void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
     }
 
     switch (opcode) {
-      case AND: thumb_opcode = 0b0000; break;
-      case EOR: thumb_opcode = 0b0001; break;
+      case AND: thumb_opcode = 0U /* 0b0000 */; break;
+      case EOR: thumb_opcode = 1U /* 0b0001 */; break;
       case SUB: break;
-      case RSB: thumb_opcode = 0b1001; break;
+      case RSB: thumb_opcode = 9U /* 0b1001 */; break;
       case ADD: break;
-      case ADC: thumb_opcode = 0b0101; break;
-      case SBC: thumb_opcode = 0b0110; break;
+      case ADC: thumb_opcode = 5U /* 0b0101 */; break;
+      case SBC: thumb_opcode = 6U /* 0b0110 */; break;
       case RSC: break;
-      case TST: thumb_opcode = 0b1000; rn = so.GetRegister(); break;
+      case TST: thumb_opcode = 8U /* 0b1000 */; rn = so.GetRegister(); break;
       case TEQ: break;
       case CMP:
         if (use_immediate) {
           // T2 encoding.
            dp_opcode = 0;
            opcode_shift = 11;
-           thumb_opcode = 0b101;
+           thumb_opcode = 5U /* 0b101 */;
            rd_shift = 8;
            rn_shift = 8;
         } else {
-          thumb_opcode = 0b1010;
+          thumb_opcode = 10U /* 0b1010 */;
           rd = rn;
           rn = so.GetRegister();
         }
 
         break;
       case CMN: {
-        thumb_opcode = 0b1011;
+        thumb_opcode = 11U /* 0b1011 */;
         rd = rn;
         rn = so.GetRegister();
         break;
       }
-      case ORR: thumb_opcode = 0b1100; break;
+      case ORR: thumb_opcode = 12U /* 0b1100 */; break;
       case MOV:
         dp_opcode = 0;
         if (use_immediate) {
           // T2 encoding.
           opcode_shift = 11;
-          thumb_opcode = 0b100;
+          thumb_opcode = 4U /* 0b100 */;
           rd_shift = 8;
           rn_shift = 8;
         } else {
           rn = so.GetRegister();
           if (IsHighRegister(rn) || IsHighRegister(rd)) {
             // Special mov for high registers.
-            dp_opcode = 0b01;
+            dp_opcode = 1U /* 0b01 */;
             opcode_shift = 7;
             // Put the top bit of rd into the bottom bit of the opcode.
-            thumb_opcode = 0b0001100 | static_cast<uint32_t>(rd) >> 3;
-            rd = static_cast<Register>(static_cast<uint32_t>(rd) & 0b111);
+            thumb_opcode = 12U /* 0b0001100 */ | static_cast<uint32_t>(rd) >> 3;
+            rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
           } else {
             thumb_opcode = 0;
           }
         }
         break;
-      case BIC: thumb_opcode = 0b1110; break;
-      case MVN: thumb_opcode = 0b1111; rn = so.GetRegister(); break;
+      case BIC: thumb_opcode = 14U /* 0b1110 */; break;
+      case MVN: thumb_opcode = 15U /* 0b1111 */; rn = so.GetRegister(); break;
       default:
         break;
     }
   }
 
-  if (thumb_opcode == 0b11111111) {
+  if (thumb_opcode == 255U /* 0b11111111 */) {
     LOG(FATAL) << "Invalid thumb1 opcode " << opcode;
   }
 
@@ -962,17 +962,17 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
         Register rm = so.GetRegister();
         if (rn == rd) {
           // Can use T2 encoding (allows 4 bit registers)
-          dp_opcode = 0b01;
+          dp_opcode = 1U /* 0b01 */;
           opcode_shift = 10;
-          thumb_opcode = 0b0001;
+          thumb_opcode = 1U /* 0b0001 */;
           // Make Rn also contain the top bit of rd.
           rn = static_cast<Register>(static_cast<uint32_t>(rm) |
-                                     (static_cast<uint32_t>(rd) & 0b1000) << 1);
-          rd = static_cast<Register>(static_cast<uint32_t>(rd) & 0b111);
+                                     (static_cast<uint32_t>(rd) & 8U /* 0b1000 */) << 1);
+          rd = static_cast<Register>(static_cast<uint32_t>(rd) & 7U /* 0b111 */);
         } else {
           // T1.
           opcode_shift = 9;
-          thumb_opcode = 0b01100;
+          thumb_opcode = 12U /* 0b01100 */;
           immediate = static_cast<uint32_t>(so.GetRegister());
           use_immediate = true;
           immediate_shift = 6;
@@ -981,11 +981,11 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
         // Immediate.
         if (rd == SP && rn == SP) {
           // ADD sp, sp, #imm
-          dp_opcode = 0b10;
-          thumb_opcode = 0b11;
+          dp_opcode = 2U /* 0b10 */;
+          thumb_opcode = 3U /* 0b11 */;
           opcode_shift = 12;
           CHECK_LT(immediate, (1 << 9));
-          CHECK_EQ((immediate & 0b11), 0);
+          CHECK_EQ((immediate & 3 /* 0b11 */), 0);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
           rn = R0;
@@ -995,11 +995,11 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
           immediate >>= 2;
         } else if (rd != SP && rn == SP) {
           // ADD rd, SP, #imm
-          dp_opcode = 0b10;
-          thumb_opcode = 0b101;
+          dp_opcode = 2U /* 0b10 */;
+          thumb_opcode = 5U /* 0b101 */;
           opcode_shift = 11;
           CHECK_LT(immediate, (1 << 10));
-          CHECK_EQ((immediate & 0b11), 0);
+          CHECK_EQ((immediate & 3 /* 0b11 */), 0);
 
           // Remove rn from instruction.
           rn = R0;
@@ -1009,12 +1009,12 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
         } else if (rn != rd) {
           // Must use T1.
           opcode_shift = 9;
-          thumb_opcode = 0b01110;
+          thumb_opcode = 14U /* 0b01110 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
           opcode_shift = 11;
-          thumb_opcode = 0b110;
+          thumb_opcode = 6U /* 0b110 */;
           rd_shift = 8;
           rn_shift = 8;
         }
@@ -1025,18 +1025,18 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
       if (so.IsRegister()) {
          // T1.
          opcode_shift = 9;
-         thumb_opcode = 0b01101;
+         thumb_opcode = 13U /* 0b01101 */;
          immediate = static_cast<uint32_t>(so.GetRegister());
          use_immediate = true;
          immediate_shift = 6;
        } else {
          if (rd == SP && rn == SP) {
            // SUB sp, sp, #imm
-           dp_opcode = 0b10;
-           thumb_opcode = 0b1100001;
+           dp_opcode = 2U /* 0b10 */;
+           thumb_opcode = 0x61 /* 0b1100001 */;
            opcode_shift = 7;
            CHECK_LT(immediate, (1 << 9));
-           CHECK_EQ((immediate & 0b11), 0);
+           CHECK_EQ((immediate & 3 /* 0b11 */), 0);
 
            // Remove rd and rn from instruction by orring it with immed and clearing bits.
            rn = R0;
@@ -1047,12 +1047,12 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
          } else if (rn != rd) {
            // Must use T1.
            opcode_shift = 9;
-           thumb_opcode = 0b01111;
+           thumb_opcode = 15U /* 0b01111 */;
            immediate_shift = 6;
          } else {
            // T2 encoding.
            opcode_shift = 11;
-           thumb_opcode = 0b111;
+           thumb_opcode = 7U /* 0b111 */;
            rd_shift = 8;
            rn_shift = 8;
          }
@@ -1094,11 +1094,11 @@ void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t a
   if (IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) {
     uint16_t opcode = 0;
     switch (shift) {
-      case LSL: opcode = 0b00; break;
-      case LSR: opcode = 0b01; break;
-      case ASR: opcode = 0b10; break;
-      case ROR: opcode = 0b11; break;
-      case RRX: opcode = 0b11; amount = 0; break;
+      case LSL: opcode = 0U /* 0b00 */; break;
+      case LSR: opcode = 1U /* 0b01 */; break;
+      case ASR: opcode = 2U /* 0b10 */; break;
+      case ROR: opcode = 3U /* 0b11 */; break;
+      case RRX: opcode = 3U /* 0b11 */; amount = 0; break;
       default:
         LOG(FATAL) << "Unsupported thumb2 shift opcode";
     }
@@ -1106,7 +1106,7 @@ void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t a
     int32_t encoding = B31 | B30 | B29 | B27 | B25 | B22 |
         0xf << 16 | (setcc ? B20 : 0);
     uint32_t imm3 = amount >> 2;
-    uint32_t imm2 = amount & 0b11;
+    uint32_t imm2 = amount & 3U /* 0b11 */;
     encoding |= imm3 << 12 | imm2 << 6 | static_cast<int16_t>(rm) |
         static_cast<int16_t>(rd) << 8 | opcode << 4;
     Emit32(encoding);
@@ -1114,9 +1114,9 @@ void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t a
     // 16 bit shift
     uint16_t opcode = 0;
     switch (shift) {
-      case LSL: opcode = 0b00; break;
-      case LSR: opcode = 0b01; break;
-      case ASR: opcode = 0b10; break;
+      case LSL: opcode = 0U /* 0b00 */; break;
+      case LSR: opcode = 1U /* 0b01 */; break;
+      case ASR: opcode = 2U /* 0b10 */; break;
       default:
          LOG(FATAL) << "Unsupported thumb2 shift opcode";
     }
@@ -1136,10 +1136,10 @@ void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register
   if (must_be_32bit) {
     uint16_t opcode = 0;
      switch (shift) {
-       case LSL: opcode = 0b00; break;
-       case LSR: opcode = 0b01; break;
-       case ASR: opcode = 0b10; break;
-       case ROR: opcode = 0b11; break;
+       case LSL: opcode = 0U /* 0b00 */; break;
+       case LSR: opcode = 1U /* 0b01 */; break;
+       case ASR: opcode = 2U /* 0b10 */; break;
+       case ROR: opcode = 3U /* 0b11 */; break;
        default:
          LOG(FATAL) << "Unsupported thumb2 shift opcode";
      }
@@ -1152,9 +1152,9 @@ void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register
   } else {
     uint16_t opcode = 0;
     switch (shift) {
-      case LSL: opcode = 0b0010; break;
-      case LSR: opcode = 0b0011; break;
-      case ASR: opcode = 0b0100; break;
+      case LSL: opcode = 2U /* 0b0010 */; break;
+      case LSR: opcode = 3U /* 0b0011 */; break;
+      case ASR: opcode = 4U /* 0b0100 */; break;
       default:
          LOG(FATAL) << "Unsupported thumb2 shift opcode";
     }
@@ -1204,7 +1204,7 @@ void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const {
     if (IsCompareAndBranch()) {
       offset -= 4;
       uint16_t i = (offset >> 6) & 1;
-      uint16_t imm5 = (offset >> 1) & 0b11111;
+      uint16_t imm5 = (offset >> 1) & 31U /* 0b11111 */;
       int16_t encoding = B15 | B13 | B12 |
             (type_ ==  kCompareAndBranchNonZero ? B11 : 0) |
             static_cast<uint32_t>(rn_) |
@@ -1304,15 +1304,15 @@ void Thumb2Assembler::EmitLoadStore(Condition cond,
       bool sp_relative = false;
 
       if (byte) {
-        opA = 0b0111;
+        opA = 7U /* 0b0111 */;
       } else if (half) {
-        opA = 0b1000;
+        opA = 8U /* 0b1000 */;
       } else {
         if (rn == SP) {
-          opA = 0b1001;
+          opA = 9U /* 0b1001 */;
           sp_relative = true;
         } else {
-          opA = 0b0110;
+          opA = 6U /* 0b0110 */;
         }
       }
       int16_t encoding = opA << 12 |
@@ -1322,7 +1322,7 @@ void Thumb2Assembler::EmitLoadStore(Condition cond,
       if (sp_relative) {
         // SP relative, 10 bit offset.
         CHECK_LT(offset, (1 << 10));
-        CHECK_EQ((offset & 0b11), 0);
+        CHECK_EQ((offset & 3 /* 0b11 */), 0);
         encoding |= rd << 8 | offset >> 2;
       } else {
         // No SP relative.  The offset is shifted right depending on
@@ -1335,12 +1335,12 @@ void Thumb2Assembler::EmitLoadStore(Condition cond,
         } else if (half) {
           // 6 bit offset, shifted by 1.
           CHECK_LT(offset, (1 << 6));
-          CHECK_EQ((offset & 0b1), 0);
+          CHECK_EQ((offset & 1 /* 0b1 */), 0);
           offset >>= 1;
         } else {
           // 7 bit offset, shifted by 2.
           CHECK_LT(offset, (1 << 7));
-          CHECK_EQ((offset & 0b11), 0);
+          CHECK_EQ((offset & 3 /* 0b11 */), 0);
           offset >>= 2;
         }
         encoding |= rn << 3 | offset  << 6;
@@ -1428,11 +1428,11 @@ void Thumb2Assembler::EmitMultiMemOp(Condition cond,
     switch (am) {
       case IA:
       case IA_W:
-        op = 0b01;
+        op = 1U /* 0b01 */;
         break;
       case DB:
       case DB_W:
-        op = 0b10;
+        op = 2U /* 0b10 */;
         break;
       case DA:
       case IB:
@@ -1534,9 +1534,9 @@ void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
 
   if (must_be_32bit) {
     // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 0b1111;
-    uint32_t i = (imm16 >> 11) & 0b1;
-    uint32_t imm3 = (imm16 >> 8) & 0b111;
+    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
     uint32_t imm8 = imm16 & 0xff;
     int32_t encoding = B31 | B30 | B29 | B28 |
                     B25 | B22 |
@@ -1557,9 +1557,9 @@ void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
 void Thumb2Assembler::movt(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
   // Always 32 bits.
-  uint32_t imm4 = (imm16 >> 12) & 0b1111;
-  uint32_t i = (imm16 >> 11) & 0b1;
-  uint32_t imm3 = (imm16 >> 8) & 0b111;
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
   uint32_t imm8 = imm16 & 0xff;
   int32_t encoding = B31 | B30 | B29 | B28 |
                   B25 | B23 | B22 |
@@ -1638,9 +1638,9 @@ void Thumb2Assembler::clrex(Condition cond) {
 
 void Thumb2Assembler::nop(Condition cond) {
   CheckCondition(cond);
-  int16_t encoding = B15 | B13 | B12 |
+  uint16_t encoding = B15 | B13 | B12 |
       B11 | B10 | B9 | B8;
-  Emit16(encoding);
+  Emit16(static_cast<int16_t>(encoding));
 }
 
 
@@ -1840,17 +1840,17 @@ void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl,
   if (dbl) {
     // Encoded as D:Vd.
     D = (reg >> 4) & 1;
-    Vd = reg & 0b1111;
+    Vd = reg & 15U /* 0b1111 */;
   } else {
     // Encoded as Vd:D.
     D = reg & 1;
-    Vd = (reg >> 1) & 0b1111;
+    Vd = (reg >> 1) & 15U /* 0b1111 */;
   }
   int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
                     B11 | B9 |
         (dbl ? B8 : 0) |
         (push ? B24 : (B23 | B20)) |
-        0b1110 << 28 |
+        14U /* 0b1110 */ << 28 |
         nregs << (dbl ? 1 : 0) |
         D << 22 |
         Vd << 12;
@@ -1992,7 +1992,7 @@ void Thumb2Assembler::it(Condition firstcond, ItState i1, ItState i2, ItState i3
       mask |= ToItMask(i3, firstcond0, 1);
       SetItCondition(i3, firstcond, 3);
       if (i3 != kItOmitted) {
-        mask |= 0b0001;
+        mask |= 1U /* 0b0001 */;
       }
     }
   }
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 3edf59be2a..c82b4f0f50 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -299,6 +299,10 @@ void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base,
   CHECK(dst.IsCoreRegister() && base.IsCoreRegister());
   LoadWFromOffset(kLoadWord, dst.AsOverlappingCoreRegisterLow(), base.AsCoreRegister(),
                   offs.Int32Value());
+  if (kPoisonHeapReferences) {
+    WRegister ref_reg = dst.AsOverlappingCoreRegisterLow();
+    ___ Neg(reg_w(ref_reg), vixl::Operand(reg_w(ref_reg)));
+  }
 }
 
 void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) {
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 788950b0b4..bf89d24bd5 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -33,7 +33,7 @@
 namespace art {
 namespace arm64 {
 
-#define MEM_OP(x...)      vixl::MemOperand(x)
+#define MEM_OP(...)      vixl::MemOperand(__VA_ARGS__)
 
 enum LoadOperandType {
   kLoadSignedByte,
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 2d70b7dd31..e6b4a6a47c 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -82,12 +82,13 @@ class ArrayRef {
       : array_(array), size_(size) {
   }
 
-  explicit ArrayRef(std::vector<T>& v)
+  template <typename Alloc>
+  explicit ArrayRef(std::vector<T, Alloc>& v)
       : array_(v.data()), size_(v.size()) {
   }
 
-  template <typename U>
-  ArrayRef(const std::vector<U>& v,
+  template <typename U, typename Alloc>
+  ArrayRef(const std::vector<U, Alloc>& v,
            typename std::enable_if<std::is_same<T, const U>::value, tag>::tag t = tag())
       : array_(v.data()), size_(v.size()) {
   }
@@ -167,6 +168,16 @@ class ArrayRef {
   size_t size_;
 };
 
+template <typename T>
+bool operator==(const ArrayRef<T>& lhs, const ArrayRef<T>& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+
+template <typename T>
+bool operator!=(const ArrayRef<T>& lhs, const ArrayRef<T>& rhs) {
+  return !(lhs == rhs);
+}
+
 }  // namespace art
 
 
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index a1a3312576..61e420c222 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -26,61 +26,14 @@ namespace art {
 // Type of growable list for memory tuning.
 enum OatListKind {
   kGrowableArrayMisc = 0,
-  kGrowableArrayBlockList,
-  kGrowableArraySSAtoDalvikMap,
-  kGrowableArrayDfsOrder,
-  kGrowableArrayDfsPostOrder,
-  kGrowableArrayDomPostOrderTraversal,
-  kGrowableArraySwitchTables,
-  kGrowableArrayFillArrayData,
-  kGrowableArraySuccessorBlocks,
-  kGrowableArrayPredecessors,
-  kGrowableArraySlowPaths,
   kGNumListKinds
 };
 
+// Deprecated
+// TODO: Replace all uses with ArenaVector<T>.
 template<typename T>
 class GrowableArray {
   public:
-    class Iterator {
-      public:
-        explicit Iterator(GrowableArray* g_list)
-          : idx_(0),
-            g_list_(g_list) {}
-
-        explicit Iterator()
-          : idx_(0),
-            g_list_(nullptr) {}
-
-        // NOTE: returns 0/NULL when no next.
-        // TODO: redo to make usage consistent with other iterators.
-        T Next() {
-          DCHECK(g_list_ != nullptr);
-          if (idx_ >= g_list_->Size()) {
-            return 0;
-          } else {
-            return g_list_->Get(idx_++);
-          }
-        }
-
-        void Reset() {
-          idx_ = 0;
-        }
-
-        void Reset(GrowableArray* g_list) {
-          idx_ = 0;
-          g_list_ = g_list;
-        }
-
-        size_t GetIndex() const {
-          return idx_;
-        }
-
-      private:
-        size_t idx_;
-        GrowableArray* g_list_;
-    };
-
     GrowableArray(ArenaAllocator* arena, size_t init_length, OatListKind kind = kGrowableArrayMisc)
       : arena_(arena),
         num_allocated_(init_length),
@@ -88,7 +41,7 @@ class GrowableArray {
         kind_(kind) {
       elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length,
                                                  kArenaAllocGrowableArray));
-    };
+    }
 
 
     // Expand the list size to at least new length.
@@ -105,7 +58,7 @@ class GrowableArray {
       memcpy(new_array, elem_list_, sizeof(T) * num_allocated_);
       num_allocated_ = target_length;
       elem_list_ = new_array;
-    };
+    }
 
     // NOTE: does not return storage, just resets use count.
     void Reset() {
@@ -136,7 +89,7 @@ class GrowableArray {
     T Get(size_t index) const {
       DCHECK_LT(index, num_used_);
       return elem_list_[index];
-    };
+    }
 
     // Overwrite existing element at position index.  List must be large enough.
     void Put(size_t index, T elem) {
@@ -167,14 +120,14 @@ class GrowableArray {
       // We should either have found the element, or it was the last (unscanned) element.
       DCHECK(found || (element == elem_list_[num_used_ - 1]));
       num_used_--;
-    };
+    }
 
     void DeleteAt(size_t index) {
       for (size_t i = index; i < num_used_ - 1; i++) {
         elem_list_[i] = elem_list_[i + 1];
       }
       num_used_--;
-    };
+    }
 
     size_t GetNumAllocated() const { return num_allocated_; }
 
@@ -201,7 +154,7 @@ class GrowableArray {
 
     static void* operator new(size_t size, ArenaAllocator* arena) {
       return arena->Alloc(sizeof(GrowableArray<T>), kArenaAllocGrowableArray);
-    };
+    }
     static void operator delete(void* p) {}  // Nop.
 
   private:
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 2c9bc28923..f888d46dd1 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -746,6 +746,7 @@ void X86Assembler::xchgl(Register dst, Register src) {
   EmitRegisterOperand(dst, src);
 }
 
+
 void X86Assembler::xchgl(Register reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x87);
@@ -753,6 +754,13 @@ void X86Assembler::xchgl(Register reg, const Address& address) {
 }
 
 
+void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86Assembler::cmpl(Register reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitComplex(7, Operand(reg), imm);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 5c4e34fc8b..ec983d9a50 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -337,6 +337,8 @@ class X86Assembler FINAL : public Assembler {
   void xchgl(Register dst, Register src);
   void xchgl(Register reg, const Address& address);
 
+  void cmpw(const Address& address, const Immediate& imm);
+
   void cmpl(Register reg, const Immediate& imm);
   void cmpl(Register reg0, Register reg1);
   void cmpl(Register reg, const Address& address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1e2884a88c..1dcd4b31d2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -839,6 +839,14 @@ void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
 }
 
 
+void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(address);
+  EmitUint8(0x66);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -1984,6 +1992,9 @@ void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
   X86_64ManagedRegister dest = mdest.AsX86_64();
   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
   movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
+  if (kPoisonHeapReferences) {
+    negl(dest.AsCpuRegister());
+  }
 }
 
 void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
@@ -2268,4 +2279,3 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
 
 }  // namespace x86_64
 }  // namespace art
-
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 763dafe7c8..1fd65c2c7c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -378,6 +378,8 @@ class X86_64Assembler FINAL : public Assembler {
   void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
+  void cmpw(const Address& address, const Immediate& imm);
+
   void cmpl(CpuRegister reg, const Immediate& imm);
   void cmpl(CpuRegister reg0, CpuRegister reg1);
   void cmpl(CpuRegister reg, const Address& address);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index afc01dce2e..09825e2cd0 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -45,7 +45,6 @@
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "elf_fixup.h"
-#include "elf_patcher.h"
 #include "elf_stripper.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
@@ -261,12 +260,12 @@ class Dex2Oat {
     CHECK(verification_results != nullptr);
     CHECK(method_inliner_map != nullptr);
     std::unique_ptr<Dex2Oat> dex2oat(new Dex2Oat(&compiler_options,
-                                           compiler_kind,
-                                           instruction_set,
-                                           instruction_set_features,
-                                           verification_results,
-                                           method_inliner_map,
-                                           thread_count));
+                                                 compiler_kind,
+                                                 instruction_set,
+                                                 instruction_set_features,
+                                                 verification_results,
+                                                 method_inliner_map,
+                                                 thread_count));
     if (!dex2oat->CreateRuntime(runtime_options, instruction_set)) {
       *p_dex2oat = nullptr;
       return false;
@@ -341,39 +340,16 @@ class Dex2Oat {
     return ReadImageClasses(image_classes_stream);
   }
 
-  bool PatchOatCode(const CompilerDriver* compiler_driver, File* oat_file,
-                    const std::string& oat_location, std::string* error_msg) {
-    // We asked to include patch information but we are not making an image. We need to fix
-    // everything up manually.
-    std::unique_ptr<ElfFile> elf_file(ElfFile::Open(oat_file, PROT_READ|PROT_WRITE,
-                                                    MAP_SHARED, error_msg));
-    if (elf_file.get() == NULL) {
-      LOG(ERROR) << error_msg;
-      return false;
-    }
-    {
-      ReaderMutexLock mu(Thread::Current(), *Locks::mutator_lock_);
-      return ElfPatcher::Patch(compiler_driver, elf_file.get(), oat_location, error_msg);
-    }
-  }
-
-  const CompilerDriver* CreateOatFile(const std::string& boot_image_option,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      File* oat_file,
-                                      const std::string& oat_location,
-                                      const std::string& bitcode_filename,
-                                      bool image,
-                                      std::unique_ptr<std::set<std::string>>& image_classes,
-                                      bool dump_stats,
-                                      bool dump_passes,
-                                      TimingLogger& timings,
-                                      CumulativeLogger& compiler_phases_timings,
-                                      std::string profile_file,
-                                      SafeMap<std::string, std::string>* key_value_store) {
-    CHECK(key_value_store != nullptr);
-
+  void Compile(const std::string& boot_image_option,
+               const std::vector<const DexFile*>& dex_files,
+               const std::string& bitcode_filename,
+               bool image,
+               std::unique_ptr<std::set<std::string>>& image_classes,
+               bool dump_stats,
+               bool dump_passes,
+               TimingLogger* timings,
+               CumulativeLogger* compiler_phases_timings,
+               const std::string& profile_file) {
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
     Thread* self = Thread::Current();
@@ -392,31 +368,45 @@ class Dex2Oat {
       Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path_files);
     }
 
-    std::unique_ptr<CompilerDriver> driver(new CompilerDriver(compiler_options_,
-                                                              verification_results_,
-                                                              method_inliner_map_,
-                                                              compiler_kind_,
-                                                              instruction_set_,
-                                                              instruction_set_features_,
-                                                              image,
-                                                              image_classes.release(),
-                                                              thread_count_,
-                                                              dump_stats,
-                                                              dump_passes,
-                                                              &compiler_phases_timings,
-                                                              profile_file));
-
-    driver->GetCompiler()->SetBitcodeFileName(*driver.get(), bitcode_filename);
-
-    driver->CompileAll(class_loader, dex_files, &timings);
-
-    TimingLogger::ScopedTiming t2("dex2oat OatWriter", &timings);
+    driver_.reset(new CompilerDriver(compiler_options_,
+                                     verification_results_,
+                                     method_inliner_map_,
+                                     compiler_kind_,
+                                     instruction_set_,
+                                     instruction_set_features_,
+                                     image,
+                                     image_classes.release(),
+                                     thread_count_,
+                                     dump_stats,
+                                     dump_passes,
+                                     compiler_phases_timings,
+                                     profile_file));
+
+    driver_->GetCompiler()->SetBitcodeFileName(*driver_, bitcode_filename);
+
+    driver_->CompileAll(class_loader, dex_files, timings);
+  }
+
+  void PrepareImageWriter(uintptr_t image_base) {
+    image_writer_.reset(new ImageWriter(*driver_, image_base));
+  }
+
+  bool CreateOatFile(const std::vector<const DexFile*>& dex_files,
+                     const std::string& android_root,
+                     bool is_host,
+                     File* oat_file,
+                     const std::string& oat_location,
+                     TimingLogger* timings,
+                     SafeMap<std::string, std::string>* key_value_store) {
+    CHECK(key_value_store != nullptr);
+
+    TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings);
     std::string image_file_location;
     uint32_t image_file_location_oat_checksum = 0;
     uintptr_t image_file_location_oat_data_begin = 0;
     int32_t image_patch_delta = 0;
-    if (!driver->IsImage()) {
-      TimingLogger::ScopedTiming t3("Loading image checksum", &timings);
+    if (!driver_->IsImage()) {
+      TimingLogger::ScopedTiming t3("Loading image checksum", timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
@@ -432,49 +422,50 @@ class Dex2Oat {
     OatWriter oat_writer(dex_files, image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
                          image_patch_delta,
-                         driver.get(),
-                         &timings,
+                         driver_.get(),
+                         image_writer_.get(),
+                         timings,
                          key_value_store);
 
+    if (driver_->IsImage()) {
+      // The OatWriter constructor has already updated offsets in methods and we need to
+      // prepare method offsets in the image address space for direct method patching.
+      t2.NewTiming("Preparing image address space");
+      if (!image_writer_->PrepareImageAddressSpace()) {
+        LOG(ERROR) << "Failed to prepare image address space.";
+        return false;
+      }
+    }
+
     t2.NewTiming("Writing ELF");
-    if (!driver->WriteElf(android_root, is_host, dex_files, &oat_writer, oat_file)) {
+    if (!driver_->WriteElf(android_root, is_host, dex_files, &oat_writer, oat_file)) {
       LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
-      return nullptr;
+      return false;
     }
 
-    // Flush result to disk. Patching code will re-open the file (mmap), so ensure that our view
-    // of the file already made it there and won't be re-ordered with writes from PatchOat or
-    // image patching.
-    oat_file->Flush();
-
-    if (!driver->IsImage() && driver->GetCompilerOptions().GetIncludePatchInformation()) {
-      t2.NewTiming("Patching ELF");
-      std::string error_msg;
-      if (!PatchOatCode(driver.get(), oat_file, oat_location, &error_msg)) {
-        LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath() << ": " << error_msg;
-        return nullptr;
-      }
+    // Flush result to disk.
+    t2.NewTiming("Flushing ELF");
+    if (oat_file->Flush() != 0) {
+      LOG(ERROR) << "Failed to flush ELF file " << oat_file->GetPath();
+      return false;
     }
 
-    return driver.release();
+    return true;
   }
 
   bool CreateImageFile(const std::string& image_filename,
-                       uintptr_t image_base,
                        const std::string& oat_filename,
-                       const std::string& oat_location,
-                       const CompilerDriver& compiler)
+                       const std::string& oat_location)
       LOCKS_EXCLUDED(Locks::mutator_lock_) {
-    uintptr_t oat_data_begin;
-    {
-      // ImageWriter is scoped so it can free memory before doing FixupElf
-      ImageWriter image_writer(compiler);
-      if (!image_writer.Write(image_filename, image_base, oat_filename, oat_location)) {
-        LOG(ERROR) << "Failed to create image file " << image_filename;
-        return false;
-      }
-      oat_data_begin = image_writer.GetOatDataBegin();
+    CHECK(image_writer_ != nullptr);
+    if (!image_writer_->Write(image_filename, oat_filename, oat_location)) {
+      LOG(ERROR) << "Failed to create image file " << image_filename;
+      return false;
     }
+    uintptr_t oat_data_begin = image_writer_->GetOatDataBegin();
+
+    // Destroy ImageWriter before doing FixupElf.
+    image_writer_.reset();
 
     std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
     if (oat_file.get() == nullptr) {
@@ -504,7 +495,9 @@ class Dex2Oat {
         method_inliner_map_(method_inliner_map),
         runtime_(nullptr),
         thread_count_(thread_count),
-        start_ns_(NanoTime()) {
+        start_ns_(NanoTime()),
+        driver_(nullptr),
+        image_writer_(nullptr) {
     CHECK(compiler_options != nullptr);
     CHECK(verification_results != nullptr);
     CHECK(method_inliner_map != nullptr);
@@ -571,6 +564,8 @@ class Dex2Oat {
   Runtime* runtime_;
   size_t thread_count_;
   uint64_t start_ns_;
+  std::unique_ptr<CompilerDriver> driver_;
+  std::unique_ptr<ImageWriter> image_writer_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
@@ -1252,6 +1247,7 @@ static int dex2oat(int argc, char** argv) {
   } else {
     oat_file.reset(new File(oat_fd, oat_location));
     oat_file->DisableAutoClose();
+    oat_file->SetLength(0);
   }
   if (oat_file.get() == nullptr) {
     PLOG(ERROR) << "Failed to create oat file: " << oat_location;
@@ -1420,22 +1416,28 @@ static int dex2oat(int argc, char** argv) {
   oss << kRuntimeISA;
   key_value_store->Put(OatHeader::kDex2OatHostKey, oss.str());
 
-  std::unique_ptr<const CompilerDriver> compiler(dex2oat->CreateOatFile(boot_image_option,
-                                                                        android_root,
-                                                                        is_host,
-                                                                        dex_files,
-                                                                        oat_file.get(),
-                                                                        oat_location,
-                                                                        bitcode_filename,
-                                                                        image,
-                                                                        image_classes,
-                                                                        dump_stats,
-                                                                        dump_passes,
-                                                                        timings,
-                                                                        compiler_phases_timings,
-                                                                        profile_file,
-                                                                        key_value_store.get()));
-  if (compiler.get() == nullptr) {
+  dex2oat->Compile(boot_image_option,
+                   dex_files,
+                   bitcode_filename,
+                   image,
+                   image_classes,
+                   dump_stats,
+                   dump_passes,
+                   &timings,
+                   &compiler_phases_timings,
+                   profile_file);
+
+  if (image) {
+    dex2oat->PrepareImageWriter(image_base);
+  }
+
+  if (!dex2oat->CreateOatFile(dex_files,
+                              android_root,
+                              is_host,
+                              oat_file.get(),
+                              oat_location,
+                              &timings,
+                              key_value_store.get())) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
     return EXIT_FAILURE;
   }
@@ -1471,34 +1473,43 @@ static int dex2oat(int argc, char** argv) {
   //
   // To get this all correct, we go through several steps.
   //
-  // 1. We have already created that oat file above with
-  // CreateOatFile. Originally this was just our own proprietary file
-  // but now it is contained within an ELF dynamic object (aka an .so
-  // file). The Compiler returned by CreateOatFile provides
-  // PatchInformation for references to oat code and Methods that need
-  // to be update once we know where the oat file will be located
-  // after the image.
+  // 1. We prepare offsets for all data in the oat file and calculate
+  // the oat data size and code size. During this stage, we also set
+  // oat code offsets in methods for use by the image writer.
+  //
+  // 2. We prepare offsets for the objects in the image and calculate
+  // the image size.
   //
-  // 2. We create the image file. It needs to know where the oat file
+  // 3. We create the oat file. Originally this was just our own proprietary
+  // file but now it is contained within an ELF dynamic object (aka an .so
+  // file). Since we know the image size and oat data size and code size we
+  // can prepare the ELF headers and we then know the ELF memory segment
+  // layout and we can now resolve all references. The compiler provides
+  // LinkerPatch information in each CompiledMethod and we resolve these,
+  // using the layout information and image object locations provided by
+  // image writer, as we're writing the method code.
+  //
+  // 4. We create the image file. It needs to know where the oat file
   // will be loaded after itself. Originally when oat file was simply
   // memory mapped so we could predict where its contents were based
   // on the file size. Now that it is an ELF file, we need to inspect
   // the ELF file to understand the in memory segment layout including
-  // where the oat header is located within. ElfPatcher's Patch method
-  // uses the PatchInformation from the Compiler to touch up absolute
-  // references in the oat file.
+  // where the oat header is located within.
+  // TODO: We could just remember this information from step 3.
   //
-  // 3. We fixup the ELF program headers so that dlopen will try to
+  // 5. We fixup the ELF program headers so that dlopen will try to
   // load the .so at the desired location at runtime by offsetting the
   // Elf32_Phdr.p_vaddr values by the desired base address.
+  // TODO: Do this in step 3. We already know the layout there.
+  //
+  // Steps 1.-3. are done by the CreateOatFile() above, steps 4.-5.
+  // are done by the CreateImageFile() below.
   //
   if (image) {
     TimingLogger::ScopedTiming t("dex2oat ImageWriter", &timings);
     bool image_creation_success = dex2oat->CreateImageFile(image_filename,
-                                                           image_base,
                                                            oat_unstripped,
-                                                           oat_location,
-                                                           *compiler.get());
+                                                           oat_location);
     if (!image_creation_success) {
       return EXIT_FAILURE;
     }
@@ -1511,7 +1522,7 @@ static int dex2oat(int argc, char** argv) {
       LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
     if (dump_passes) {
-      LOG(INFO) << Dumpable<CumulativeLogger>(*compiler.get()->GetTimingsLogger());
+      LOG(INFO) << Dumpable<CumulativeLogger>(compiler_phases_timings);
     }
     return EXIT_SUCCESS;
   }
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 54e77612a3..6f8e08b3ba 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -82,14 +82,14 @@ void DisassemblerArm::DumpCond(std::ostream& os, uint32_t cond) {
 
 void DisassemblerArm::DumpMemoryDomain(std::ostream& os, uint32_t domain) {
   switch (domain) {
-    case 0b1111: os << "sy"; break;
-    case 0b1110: os << "st"; break;
-    case 0b1011: os << "ish"; break;
-    case 0b1010: os << "ishst"; break;
-    case 0b0111: os << "nsh"; break;
-    case 0b0110: os << "nshst"; break;
-    case 0b0011: os << "osh"; break;
-    case 0b0010: os << "oshst"; break;
+    case 15U /* 0b1111 */: os << "sy"; break;
+    case 14U /* 0b1110 */: os << "st"; break;
+    case 11U /* 0b1011 */: os << "ish"; break;
+    case 10U /* 0b1010 */: os << "ishst"; break;
+    case  7U /* 0b0111 */: os << "nsh"; break;
+    case  6U /* 0b0110 */: os << "nshst"; break;
+    case  3U /* 0b0011 */: os << "osh"; break;
+    case  2U /* 0b0010 */: os << "oshst"; break;
   }
 }
 
@@ -269,7 +269,7 @@ void DisassemblerArm::DumpArm(std::ostream& os, const uint8_t* instr_ptr) {
         uint32_t op = (instruction >> 21) & 0xf;
         opcode = kDataProcessingOperations[op];
         bool implicit_s = ((op & ~3) == 8);  // TST, TEQ, CMP, and CMN.
-        bool is_mov = op == 0b1101 || op == 0b1111;
+        bool is_mov = op == 13U /* 0b1101 */ || op == 15U /* 0b1111 */;
         if (is_mov) {
           // Show only Rd and Rm.
           if (s) {
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 1d29765d4e..195c45f79d 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -58,10 +58,10 @@ static const char* gReg64Names[] = {
 };
 
 // 64-bit opcode REX modifier.
-constexpr uint8_t REX_W = 0b1000;
-constexpr uint8_t REX_R = 0b0100;
-constexpr uint8_t REX_X = 0b0010;
-constexpr uint8_t REX_B = 0b0001;
+constexpr uint8_t REX_W = 8U /* 0b1000 */;
+constexpr uint8_t REX_R = 4U /* 0b0100 */;
+constexpr uint8_t REX_X = 2U /* 0b0010 */;
+constexpr uint8_t REX_B = 1U /* 0b0001 */;
 
 static void DumpReg0(std::ostream& os, uint8_t rex, size_t reg,
                      bool byte_operand, uint8_t size_override) {
@@ -767,7 +767,7 @@ DISASSEMBLER_ENTRY(cmp,
       case 0xB1: opcode << "cmpxchg"; has_modrm = true; store = true; break;
       case 0xB6: opcode << "movzxb"; has_modrm = true; load = true; byte_second_operand = true; break;
       case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break;
-      case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; byte_second_operand = true; rex |= (rex == 0 ? 0 : 0b1000); break;
+      case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; byte_second_operand = true; rex |= (rex == 0 ? 0 : REX_W); break;
       case 0xBF: opcode << "movsxw"; has_modrm = true; load = true; break;
       case 0xC3: opcode << "movnti"; store = true; has_modrm = true; break;
       case 0xC5:
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 888f2d2524..d5e766f69d 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -315,6 +315,11 @@ class OatSymbolizer : public CodeOutput {
     }
   }
 
+  // Set oat data offset. Required by ElfBuilder/CodeOutput.
+  void SetCodeOffset(size_t offset) {
+    // Nothing to do.
+  }
+
   // Write oat code. Required by ElfBuilder/CodeOutput.
   bool Write(OutputStream* out) {
     return out->WriteFully(oat_file_->Begin(), oat_file_->End() - oat_file_->Begin());
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 326333eb7d..9086d58fc7 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -36,7 +36,7 @@ class Object;
 class Reference;
 class Class;
 class ArtMethod;
-};  // namespace mirror
+}  // namespace mirror
 
 class PatchOat {
  public:
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 46b2e10497..e9544761bd 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -79,7 +79,6 @@ LIBART_COMMON_SRC_FILES := \
   intern_table.cc \
   interpreter/interpreter.cc \
   interpreter/interpreter_common.cc \
-  interpreter/interpreter_goto_table_impl.cc \
   interpreter/interpreter_switch_impl.cc \
   java_vm_ext.cc \
   jdwp/jdwp_event.cc \
@@ -201,6 +200,10 @@ LIBART_COMMON_SRC_FILES += \
   entrypoints/quick/quick_throw_entrypoints.cc \
   entrypoints/quick/quick_trampoline_entrypoints.cc
 
+# Source files that only compile with GCC.
+LIBART_GCC_ONLY_SRC_FILES := \
+  interpreter/interpreter_goto_table_impl.cc
+
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
@@ -419,7 +422,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT
   include external/libcxx/libcxx.mk
   LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
   ifeq ($$(art_target_or_host),target)
-    LOCAL_SHARED_LIBRARIES += libcutils libdl libselinux libutils libsigchain
+    LOCAL_SHARED_LIBRARIES += libcutils libdl libutils libsigchain
     LOCAL_STATIC_LIBRARIES := libziparchive libz
   else # host
     LOCAL_STATIC_LIBRARIES += libcutils libziparchive-host libz libutils
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 38a88c574c..2780d1b5c9 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -256,6 +256,6 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-};
+}
 
 }  // namespace art
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index ee005e8f66..17310b6d95 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -104,11 +104,17 @@ static uint32_t GetInstructionSize(const uint8_t* pc) {
   bool two_byte = false;
   uint32_t displacement_size = 0;
   uint32_t immediate_size = 0;
+  bool operand_size_prefix = false;
 
   // Prefixes.
   while (true) {
     bool prefix_present = false;
     switch (opcode) {
+      // Group 3
+      case 0x66:
+        operand_size_prefix = true;
+        // fallthrough
+
       // Group 1
       case 0xf0:
       case 0xf2:
@@ -122,9 +128,6 @@ static uint32_t GetInstructionSize(const uint8_t* pc) {
       case 0x64:
       case 0x65:
 
-      // Group 3
-      case 0x66:
-
       // Group 4
       case 0x67:
         opcode = *pc++;
@@ -189,7 +192,7 @@ static uint32_t GetInstructionSize(const uint8_t* pc) {
       case 0x81:        // group 1, word immediate.
         modrm = *pc++;
         has_modrm = true;
-        immediate_size = 4;
+        immediate_size = operand_size_prefix ? 2 : 4;
         break;
 
       default:
@@ -204,18 +207,18 @@ static uint32_t GetInstructionSize(const uint8_t* pc) {
   }
 
   if (has_modrm) {
-    uint8_t mod = (modrm >> 6) & 0b11;
+    uint8_t mod = (modrm >> 6) & 3U /* 0b11 */;
 
     // Check for SIB.
-    if (mod != 0b11 && (modrm & 0b111) == 4) {
+    if (mod != 3U /* 0b11 */ && (modrm & 7U /* 0b111 */) == 4) {
       ++pc;     // SIB
     }
 
     switch (mod) {
-      case 0b00: break;
-      case 0b01: displacement_size = 1; break;
-      case 0b10: displacement_size = 4; break;
-      case 0b11:
+      case 0U /* 0b00 */: break;
+      case 1U /* 0b01 */: displacement_size = 1; break;
+      case 2U /* 0b10 */: displacement_size = 4; break;
+      case 3U /* 0b11 */:
         break;
     }
   }
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index caeb946ff0..cf3e763f1e 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -151,7 +151,7 @@ namespace art {
 
 template <typename LHS, typename RHS>
 struct EagerEvaluator {
-  EagerEvaluator(LHS lhs, RHS rhs) : lhs(lhs), rhs(rhs) { }
+  EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
   LHS lhs;
   RHS rhs;
 };
@@ -163,9 +163,9 @@ struct EagerEvaluator {
 // protect you against combinations not explicitly listed below.
 #define EAGER_PTR_EVALUATOR(T1, T2) \
   template <> struct EagerEvaluator<T1, T2> { \
-    EagerEvaluator(T1 lhs, T2 rhs) \
-        : lhs(reinterpret_cast<const void*>(lhs)), \
-          rhs(reinterpret_cast<const void*>(rhs)) { } \
+    EagerEvaluator(T1 l, T2 r) \
+        : lhs(reinterpret_cast<const void*>(l)), \
+          rhs(reinterpret_cast<const void*>(r)) { } \
     const void* lhs; \
     const void* rhs; \
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 2c95eded08..4383a7c43f 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -37,6 +37,7 @@ ReaderWriterMutex* Locks::breakpoint_lock_ = nullptr;
 ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
 Mutex* Locks::deoptimization_lock_ = nullptr;
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
+Mutex* Locks::instrument_entrypoints_lock_ = nullptr;
 Mutex* Locks::intern_table_lock_ = nullptr;
 Mutex* Locks::jni_libraries_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
@@ -876,6 +877,10 @@ void Locks::Init() {
       } \
       current_lock_level = new_level;
 
+    UPDATE_CURRENT_LOCK_LEVEL(kInstrumentEntrypointsLock);
+    DCHECK(instrument_entrypoints_lock_ == nullptr);
+    instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kMutatorLock);
     DCHECK(mutator_lock_ == nullptr);
     mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 8d2cdce802..516fa07b66 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -100,6 +100,7 @@ enum LockLevel {
   kTraceLock,
   kHeapBitmapLock,
   kMutatorLock,
+  kInstrumentEntrypointsLock,
   kThreadListSuspendThreadLock,
   kZygoteCreationLock,
 
@@ -491,6 +492,9 @@ class Locks {
   // potential deadlock cycle.
   static Mutex* thread_list_suspend_thread_lock_;
 
+  // Guards allocation entrypoint instrumenting.
+  static Mutex* instrument_entrypoints_lock_ ACQUIRED_AFTER(thread_list_suspend_thread_lock_);
+
   // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
   // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
   // a share on the mutator_lock_. The garbage collector may also execute with shared access but
@@ -549,7 +553,7 @@ class Locks {
   // else                                          |  .. running ..
   //   Goto x                                      |  .. running ..
   //  .. running ..                                |  .. running ..
-  static ReaderWriterMutex* mutator_lock_ ACQUIRED_AFTER(thread_list_suspend_thread_lock_);
+  static ReaderWriterMutex* mutator_lock_ ACQUIRED_AFTER(instrument_entrypoints_lock_);
 
   // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
   static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
diff --git a/runtime/base/stringpiece.cc b/runtime/base/stringpiece.cc
index 47140e3247..824ee4863d 100644
--- a/runtime/base/stringpiece.cc
+++ b/runtime/base/stringpiece.cc
@@ -19,26 +19,34 @@
 #include <iostream>
 #include <utility>
 
+#include "logging.h"
+
 namespace art {
 
+#if !defined(NDEBUG)
+char StringPiece::operator[](size_type i) const {
+  CHECK_LT(i, length_);
+  return ptr_[i];
+}
+#endif
+
 void StringPiece::CopyToString(std::string* target) const {
   target->assign(ptr_, length_);
 }
 
-int StringPiece::copy(char* buf, size_type n, size_type pos) const {
-  int ret = std::min(length_ - pos, n);
+StringPiece::size_type StringPiece::copy(char* buf, size_type n, size_type pos) const {
+  size_type ret = std::min(length_ - pos, n);
   memcpy(buf, ptr_ + pos, ret);
   return ret;
 }
 
 StringPiece::size_type StringPiece::find(const StringPiece& s, size_type pos) const {
-  if (length_ < 0 || pos > static_cast<size_type>(length_))
+  if (length_ == 0 || pos > static_cast<size_type>(length_)) {
     return npos;
-
-  const char* result = std::search(ptr_ + pos, ptr_ + length_,
-                                   s.ptr_, s.ptr_ + s.length_);
+  }
+  const char* result = std::search(ptr_ + pos, ptr_ + length_, s.ptr_, s.ptr_ + s.length_);
   const size_type xpos = result - ptr_;
-  return xpos + s.length_ <= static_cast<size_type>(length_) ? xpos : npos;
+  return xpos + s.length_ <= length_ ? xpos : npos;
 }
 
 int StringPiece::compare(const StringPiece& x) const {
@@ -51,7 +59,7 @@ int StringPiece::compare(const StringPiece& x) const {
 }
 
 StringPiece::size_type StringPiece::find(char c, size_type pos) const {
-  if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {
+  if (length_ == 0 || pos >= length_) {
     return npos;
   }
   const char* result = std::find(ptr_ + pos, ptr_ + length_, c);
@@ -69,7 +77,7 @@ StringPiece::size_type StringPiece::rfind(const StringPiece& s, size_type pos) c
 }
 
 StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
-  if (length_ <= 0) return npos;
+  if (length_ == 0) return npos;
   for (int i = std::min(pos, static_cast<size_type>(length_ - 1));
        i >= 0; --i) {
     if (ptr_[i] == c) {
@@ -85,8 +93,6 @@ StringPiece StringPiece::substr(size_type pos, size_type n) const {
   return StringPiece(ptr_ + pos, n);
 }
 
-const StringPiece::size_type StringPiece::npos = size_type(-1);
-
 std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
   o.write(piece.data(), piece.size());
   return o;
diff --git a/runtime/base/stringpiece.h b/runtime/base/stringpiece.h
index 2dde245a8d..b8de308057 100644
--- a/runtime/base/stringpiece.h
+++ b/runtime/base/stringpiece.h
@@ -14,81 +14,90 @@
  * limitations under the License.
  */
 
-// A string-like object that points to a sized piece of memory.
-//
-// Functions or methods may use const StringPiece& parameters to accept either
-// a "const char*" or a "string" value that will be implicitly converted to
-// a StringPiece.  The implicit conversion means that it is often appropriate
-// to include this .h file in other files rather than forward-declaring
-// StringPiece as would be appropriate for most other Google classes.
-//
-// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
-// conversions from "const char*" to "string" and back again.
-
 #ifndef ART_RUNTIME_BASE_STRINGPIECE_H_
 #define ART_RUNTIME_BASE_STRINGPIECE_H_
 
 #include <string.h>
-#include <algorithm>
-#include <cstddef>
-#include <iosfwd>
 #include <string>
 
 namespace art {
 
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece.  The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
 class StringPiece {
- private:
-  const char*   ptr_;
-  int           length_;
-
  public:
+  // standard STL container boilerplate
+  typedef char value_type;
+  typedef const char* pointer;
+  typedef const char& reference;
+  typedef const char& const_reference;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  static constexpr size_type npos = size_type(-1);
+  typedef const char* const_iterator;
+  typedef const char* iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
   // We provide non-explicit singleton constructors so users can pass
   // in a "const char*" or a "string" wherever a "StringPiece" is
   // expected.
-  StringPiece() : ptr_(NULL), length_(0) { }
-  StringPiece(const char* str)  // NOLINT
-    : ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
-  StringPiece(const std::string& str)  // NOLINT
-    : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
-  StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
+  StringPiece() : ptr_(nullptr), length_(0) { }
+  StringPiece(const char* str)  // NOLINT implicit constructor desired
+    : ptr_(str), length_((str == nullptr) ? 0 : strlen(str)) { }
+  StringPiece(const std::string& str)  // NOLINT implicit constructor desired
+    : ptr_(str.data()), length_(str.size()) { }
+  StringPiece(const char* offset, size_t len) : ptr_(offset), length_(len) { }
 
   // data() may return a pointer to a buffer with embedded NULs, and the
   // returned buffer may or may not be null terminated.  Therefore it is
   // typically a mistake to pass data() to a routine that expects a NUL
   // terminated string.
   const char* data() const { return ptr_; }
-  int size() const { return length_; }
-  int length() const { return length_; }
+  size_type size() const { return length_; }
+  size_type length() const { return length_; }
   bool empty() const { return length_ == 0; }
 
   void clear() {
-    ptr_ = NULL;
+    ptr_ = nullptr;
     length_ = 0;
   }
-  void set(const char* data, int len) {
+  void set(const char* data, size_type len) {
     ptr_ = data;
     length_ = len;
   }
   void set(const char* str) {
     ptr_ = str;
-    if (str != NULL)
-      length_ = static_cast<int>(strlen(str));
-    else
+    if (str != nullptr) {
+      length_ = strlen(str);
+    } else {
       length_ = 0;
+    }
   }
-  void set(const void* data, int len) {
+  void set(const void* data, size_type len) {
     ptr_ = reinterpret_cast<const char*>(data);
     length_ = len;
   }
 
-  char operator[](int i) const { return ptr_[i]; }
+#if defined(NDEBUG)
+  char operator[](size_type i) const {
+    return ptr_[i];
+  }
+#else
+  char operator[](size_type i) const;
+#endif
 
-  void remove_prefix(int n) {
+  void remove_prefix(size_type n) {
     ptr_ += n;
     length_ -= n;
   }
 
-  void remove_suffix(int n) {
+  void remove_suffix(size_type n) {
     length_ -= n;
   }
 
@@ -121,18 +130,6 @@ class StringPiece {
             (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
   }
 
-  // standard STL container boilerplate
-  typedef char value_type;
-  typedef const char* pointer;
-  typedef const char& reference;
-  typedef const char& const_reference;
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  static const size_type npos;
-  typedef const char* const_iterator;
-  typedef const char* iterator;
-  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-  typedef std::reverse_iterator<iterator> reverse_iterator;
   iterator begin() const { return ptr_; }
   iterator end() const { return ptr_ + length_; }
   const_reverse_iterator rbegin() const {
@@ -141,11 +138,8 @@ class StringPiece {
   const_reverse_iterator rend() const {
     return const_reverse_iterator(ptr_);
   }
-  // STLS says return size_type, but Google says return int
-  int max_size() const { return length_; }
-  int capacity() const { return length_; }
 
-  int copy(char* buf, size_type n, size_type pos = 0) const;
+  size_type copy(char* buf, size_type n, size_type pos = 0) const;
 
   size_type find(const StringPiece& s, size_type pos = 0) const;
   size_type find(char c, size_type pos = 0) const;
@@ -153,13 +147,19 @@ class StringPiece {
   size_type rfind(char c, size_type pos = npos) const;
 
   StringPiece substr(size_type pos, size_type n = npos) const;
+
+ private:
+  // Pointer to char data, not necessarily zero terminated.
+  const char* ptr_;
+  // Length of data.
+  size_type   length_;
 };
 
 // This large function is defined inline so that in a fairly common case where
 // one of the arguments is a literal, the compiler can elide a lot of the
 // following comparisons.
 inline bool operator==(const StringPiece& x, const StringPiece& y) {
-  int len = x.size();
+  StringPiece::size_type len = x.size();
   if (len != y.size()) {
     return false;
   }
@@ -169,7 +169,7 @@ inline bool operator==(const StringPiece& x, const StringPiece& y) {
   if (p1 == p2) {
     return true;
   }
-  if (len <= 0) {
+  if (len == 0) {
     return true;
   }
 
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 95223d856c..bfe44a28bc 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1914,7 +1914,7 @@ class CheckJNI {
   }
 
   static void CallStaticVoidMethodA(JNIEnv* env, jclass c, jmethodID mid, jvalue* vargs) {
-    CallMethodA(__FUNCTION__, env, c, nullptr, mid, vargs, Primitive::kPrimVoid, kStatic);
+    CallMethodA(__FUNCTION__, env, nullptr, c, mid, vargs, Primitive::kPrimVoid, kStatic);
   }
 
   static void CallVoidMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list vargs) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index f927720ce4..cc77c508a5 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -314,6 +314,7 @@ void ClassLinker::InitWithoutImage(const std::vector<const DexFile*>& boot_class
     java_lang_Class->AssertReadBarrierPointer();
   }
   java_lang_Class->SetClassSize(mirror::Class::ClassClassSize());
+  java_lang_Class->SetPrimitiveType(Primitive::kPrimNot);
   heap->DecrementDisableMovingGC(self);
   // AllocClass(mirror::Class*) can now be used
 
@@ -338,6 +339,12 @@ void ClassLinker::InitWithoutImage(const std::vector<const DexFile*>& boot_class
   // Setup the char (primitive) class to be used for char[].
   Handle<mirror::Class> char_class(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(), mirror::Class::PrimitiveClassSize())));
+  // The primitive char class won't be initialized by
+  // InitializePrimitiveClass until line 459, but strings (and
+  // internal char arrays) will be allocated before that and the
+  // component size, which is computed from the primitive type, needs
+  // to be set here.
+  char_class->SetPrimitiveType(Primitive::kPrimChar);
 
   // Setup the char[] class to be used for String.
   Handle<mirror::Class> char_array_class(hs.NewHandle(
@@ -2109,7 +2116,7 @@ mirror::Class* ClassLinker::FindClassInPathClassLoader(ScopedObjectAccessAlready
         hs.NewHandle(soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie));
     Handle<mirror::ArtField> dex_file_field =
         hs.NewHandle(
-            soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList$Element_dexFile));
+            soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile));
     mirror::Object* dex_path_list =
         soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
         GetObject(class_loader.Get());
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 3ea74e01ac..111dd6369e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -673,7 +673,7 @@ class ClassLinker {
   std::vector<const DexFile*> boot_class_path_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::vector<size_t> new_dex_cache_roots_ GUARDED_BY(dex_lock_);;
+  std::vector<size_t> new_dex_cache_roots_ GUARDED_BY(dex_lock_);
   std::vector<GcRoot<mirror::DexCache>> dex_caches_ GUARDED_BY(dex_lock_);
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a9c4b4a514..96b44bfdf7 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1422,7 +1422,7 @@ JDWP::JdwpError Dbg::CreateArrayObject(JDWP::RefTypeId array_class_id, uint32_t
     return error;
   }
   *new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length,
-                                                         c->GetComponentSize(),
+                                                         c->GetComponentSizeShift(),
                                                          Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   return JDWP::ERR_NONE;
 }
@@ -4453,7 +4453,7 @@ void Dbg::SetAllocTrackingEnabled(bool enable) {
       recent_allocation_records_ = new AllocRecord[alloc_record_max_];
       CHECK(recent_allocation_records_ != nullptr);
     }
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(false);
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   } else {
     {
       ScopedObjectAccess soa(self);  // For type_cache_.Clear();
@@ -4469,7 +4469,7 @@ void Dbg::SetAllocTrackingEnabled(bool enable) {
       type_cache_.Clear();
     }
     // If an allocation comes in before we uninstrument, we will safely drop it on the floor.
-    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(false);
+    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
   }
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 4ef7d741f7..9fb9a3ba56 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -230,11 +230,11 @@ static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
     return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                               klass->GetComponentSize(),
+                                               klass->GetComponentSizeShift(),
                                                heap->GetCurrentAllocator());
   }
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                             klass->GetComponentSize(), allocator_type);
+                                             klass->GetComponentSizeShift(), allocator_type);
 }
 
 template <bool kAccessCheck, bool kInstrumented>
@@ -259,7 +259,7 @@ static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
   // No need to retry a slow-path allocation as the above code won't cause a GC or thread
   // suspension.
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                             klass->GetComponentSize(), allocator_type);
+                                             klass->GetComponentSizeShift(), allocator_type);
 }
 
 template<FindFieldType type, bool access_check>
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index a78c2c005b..835d6e2b7e 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -90,7 +90,8 @@ mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod*
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
   // the heap switched the allocator type while we were suspended.
-  return mirror::Array::Alloc<false>(self, klass, component_count, klass->GetComponentSize(),
+  return mirror::Array::Alloc<false>(self, klass, component_count,
+                                     klass->GetComponentSizeShift(),
                                      heap->GetCurrentAllocator());
 }
 
@@ -109,7 +110,8 @@ mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
   // the heap switched the allocator type while we were suspended.
-  return mirror::Array::Alloc<true>(self, klass, component_count, klass->GetComponentSize(),
+  return mirror::Array::Alloc<true>(self, klass, component_count,
+                                    klass->GetComponentSizeShift(),
                                     heap->GetCurrentAllocator());
 }
 
diff --git a/runtime/entrypoints/portable/portable_thread_entrypoints.cc b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
index 7d5ccc2256..ecbc65ee55 100644
--- a/runtime/entrypoints/portable/portable_thread_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "mirror/art_method-inl.h"
 #include "verifier/dex_gc_map.h"
 #include "stack.h"
 #include "thread-inl.h"
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index e573d6da85..e728f7dd11 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,7 +18,9 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "base/mutex.h"
+#include "gc_root-inl.h"
 #include "instruction_set.h"
+#include "runtime-inl.h"
 #include "thread-inl.h"
 
 // Specific frame size code is in architecture-specific files. We include this to compile-time
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 9d850c55bf..d8da463981 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -216,10 +216,10 @@ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrument
 
 // Generate the entrypoint functions.
 #if !defined(__APPLE__) || !defined(__LP64__)
-GENERATE_ENTRYPOINTS(_dlmalloc);
-GENERATE_ENTRYPOINTS(_rosalloc);
-GENERATE_ENTRYPOINTS(_bump_pointer);
-GENERATE_ENTRYPOINTS(_tlab);
+GENERATE_ENTRYPOINTS(_dlmalloc)
+GENERATE_ENTRYPOINTS(_rosalloc)
+GENERATE_ENTRYPOINTS(_bump_pointer)
+GENERATE_ENTRYPOINTS(_tlab)
 #endif
 
 static bool entry_points_instrumented = false;
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index 7fd3fe9040..ec0aef57a7 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -17,15 +17,12 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ALLOC_ENTRYPOINTS_H_
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ALLOC_ENTRYPOINTS_H_
 
-#include "gc/heap.h"
+#include "base/mutex.h"
+#include "gc/allocator_type.h"
 #include "quick_entrypoints.h"
 
 namespace art {
 
-namespace gc {
-enum AllocatorType;
-}  // namespace gc
-
 void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 // Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 87f04bbbf2..c2395352bc 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "verify_object-inl.h"
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index c97144990d..3101c68599 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -277,7 +277,7 @@ inline Heap::AllocationTimer::~AllocationTimer() {
       heap_->total_allocation_time_.FetchAndAddSequentiallyConsistent(allocation_end_time - allocation_start_time_);
     }
   }
-};
+}
 
 inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const {
   // We need to have a zygote space or else our newly allocated large object can end up in the
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 864bb7251f..d672510b1c 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -259,7 +259,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
                              non_moving_space_capacity, PROT_READ | PROT_WRITE, true, &error_str));
     CHECK(non_moving_space_mem_map != nullptr) << error_str;
     // Try to reserve virtual memory at a lower address if we have a separate non moving space.
-    request_begin = reinterpret_cast<byte*>(0x1000000);
+    request_begin = reinterpret_cast<byte*>(300 * MB);
   }
   // Attempt to create 2 mem maps at or after the requested begin.
   main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0], request_begin, capacity_,
@@ -426,7 +426,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
     }
   }
   if (running_on_valgrind_) {
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(false);
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   }
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 9ee6d897ab..7e770f64a9 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -47,7 +47,7 @@ inline bool IndirectReferenceTable::GetChecked(IndirectRef iref) const {
     AbortIfNoCheckJNI();
     return false;
   }
-  if (UNLIKELY(table_[idx].IsNull())) {
+  if (UNLIKELY(table_[idx].GetReference()->IsNull())) {
     LOG(ERROR) << "JNI ERROR (app bug): accessed deleted " << kind_ << " " << iref;
     AbortIfNoCheckJNI();
     return false;
@@ -77,7 +77,7 @@ inline mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const {
     return nullptr;
   }
   uint32_t idx = ExtractIndex(iref);
-  mirror::Object* obj = table_[idx].Read<kReadBarrierOption>();
+  mirror::Object* obj = table_[idx].GetReference()->Read<kWithoutReadBarrier>();
   VerifyObject(obj);
   return obj;
 }
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 2278408a52..c1455fd0bc 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -64,34 +64,22 @@ void IndirectReferenceTable::AbortIfNoCheckJNI() {
 }
 
 IndirectReferenceTable::IndirectReferenceTable(size_t initialCount,
-                                               size_t maxCount, IndirectRefKind desiredKind) {
+                                               size_t maxCount, IndirectRefKind desiredKind)
+    : kind_(desiredKind),
+      max_entries_(maxCount) {
   CHECK_GT(initialCount, 0U);
   CHECK_LE(initialCount, maxCount);
   CHECK_NE(desiredKind, kHandleScopeOrInvalid);
 
   std::string error_str;
-  const size_t initial_bytes = initialCount * sizeof(const mirror::Object*);
-  const size_t table_bytes = maxCount * sizeof(const mirror::Object*);
+  const size_t table_bytes = maxCount * sizeof(IrtEntry);
   table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
                                             PROT_READ | PROT_WRITE, false, &error_str));
   CHECK(table_mem_map_.get() != nullptr) << error_str;
   CHECK_EQ(table_mem_map_->Size(), table_bytes);
-
-  table_ = reinterpret_cast<GcRoot<mirror::Object>*>(table_mem_map_->Begin());
+  table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin());
   CHECK(table_ != nullptr);
-  memset(table_, 0xd1, initial_bytes);
-
-  const size_t slot_bytes = maxCount * sizeof(IndirectRefSlot);
-  slot_mem_map_.reset(MemMap::MapAnonymous("indirect ref table slots", nullptr, slot_bytes,
-                                           PROT_READ | PROT_WRITE, false, &error_str));
-  CHECK(slot_mem_map_.get() != nullptr) << error_str;
-  slot_data_ = reinterpret_cast<IndirectRefSlot*>(slot_mem_map_->Begin());
-  CHECK(slot_data_ != nullptr);
-
   segment_state_.all = IRT_FIRST_SEGMENT;
-  alloc_entries_ = initialCount;
-  max_entries_ = maxCount;
-  kind_ = desiredKind;
 }
 
 IndirectReferenceTable::~IndirectReferenceTable() {
@@ -105,24 +93,12 @@ IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
   CHECK(obj != NULL);
   VerifyObject(obj);
   DCHECK(table_ != NULL);
-  DCHECK_LE(alloc_entries_, max_entries_);
   DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
-  if (topIndex == alloc_entries_) {
-    // reached end of allocated space; did we hit buffer max?
-    if (topIndex == max_entries_) {
-      LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
-                 << "(max=" << max_entries_ << ")\n"
-                 << MutatorLockedDumpable<IndirectReferenceTable>(*this);
-    }
-
-    size_t newSize = alloc_entries_ * 2;
-    if (newSize > max_entries_) {
-      newSize = max_entries_;
-    }
-    DCHECK_GT(newSize, alloc_entries_);
-
-    alloc_entries_ = newSize;
+  if (topIndex == max_entries_) {
+    LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
+               << "(max=" << max_entries_ << ")\n"
+               << MutatorLockedDumpable<IndirectReferenceTable>(*this);
   }
 
   // We know there's enough room in the table.  Now we just need to find
@@ -130,27 +106,26 @@ IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
   // add to the end of the list.
   IndirectRef result;
   int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
+  size_t index;
   if (numHoles > 0) {
     DCHECK_GT(topIndex, 1U);
     // Find the first hole; likely to be near the end of the list.
-    GcRoot<mirror::Object>* pScan = &table_[topIndex - 1];
-    DCHECK(!pScan->IsNull());
+    IrtEntry* pScan = &table_[topIndex - 1];
+    DCHECK(!pScan->GetReference()->IsNull());
     --pScan;
-    while (!pScan->IsNull()) {
+    while (!pScan->GetReference()->IsNull()) {
       DCHECK_GE(pScan, table_ + prevState.parts.topIndex);
       --pScan;
     }
-    UpdateSlotAdd(obj, pScan - table_);
-    result = ToIndirectRef(pScan - table_);
-    *pScan = GcRoot<mirror::Object>(obj);
+    index = pScan - table_;
     segment_state_.parts.numHoles--;
   } else {
     // Add to the end.
-    UpdateSlotAdd(obj, topIndex);
-    result = ToIndirectRef(topIndex);
-    table_[topIndex++] = GcRoot<mirror::Object>(obj);
+    index = topIndex++;
     segment_state_.parts.topIndex = topIndex;
   }
+  table_[index].Add(obj);
+  result = ToIndirectRef(index);
   if (false) {
     LOG(INFO) << "+++ added at " << ExtractIndex(result) << " top=" << segment_state_.parts.topIndex
               << " holes=" << segment_state_.parts.numHoles;
@@ -162,7 +137,7 @@ IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
 
 void IndirectReferenceTable::AssertEmpty() {
   for (size_t i = 0; i < Capacity(); ++i) {
-    if (!table_[i].IsNull()) {
+    if (!table_[i].GetReference()->IsNull()) {
       ScopedObjectAccess soa(Thread::Current());
       LOG(FATAL) << "Internal Error: non-empty local reference table\n"
                  << MutatorLockedDumpable<IndirectReferenceTable>(*this);
@@ -185,7 +160,6 @@ bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
   int bottomIndex = prevState.parts.topIndex;
 
   DCHECK(table_ != NULL);
-  DCHECK_LE(alloc_entries_, max_entries_);
   DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
   int idx = ExtractIndex(iref);
@@ -195,7 +169,6 @@ bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
     LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
     return true;
   }
-
   if (idx < bottomIndex) {
     // Wrong segment.
     LOG(WARNING) << "Attempt to remove index outside index area (" << idx
@@ -209,23 +182,23 @@ bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
     return false;
   }
 
-  if (idx == topIndex-1) {
+  if (idx == topIndex - 1) {
     // Top-most entry.  Scan up and consume holes.
 
     if (!CheckEntry("remove", iref, idx)) {
       return false;
     }
 
-    table_[idx] = GcRoot<mirror::Object>(nullptr);
+    *table_[idx].GetReference() = GcRoot<mirror::Object>(nullptr);
     int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
     if (numHoles != 0) {
       while (--topIndex > bottomIndex && numHoles != 0) {
         if (false) {
-          LOG(INFO) << "+++ checking for hole at " << topIndex-1
+          LOG(INFO) << "+++ checking for hole at " << topIndex - 1
                     << " (cookie=" << cookie << ") val="
-                    << table_[topIndex - 1].Read<kWithoutReadBarrier>();
+                    << table_[topIndex - 1].GetReference()->Read<kWithoutReadBarrier>();
         }
-        if (!table_[topIndex-1].IsNull()) {
+        if (!table_[topIndex - 1].GetReference()->IsNull()) {
           break;
         }
         if (false) {
@@ -245,7 +218,7 @@ bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
     // Not the top-most entry.  This creates a hole.  We NULL out the
     // entry to prevent somebody from deleting it twice and screwing up
     // the hole count.
-    if (table_[idx].IsNull()) {
+    if (table_[idx].GetReference()->IsNull()) {
       LOG(INFO) << "--- WEIRD: removing null entry " << idx;
       return false;
     }
@@ -253,7 +226,7 @@ bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
       return false;
     }
 
-    table_[idx] = GcRoot<mirror::Object>(nullptr);
+    *table_[idx].GetReference() = GcRoot<mirror::Object>(nullptr);
     segment_state_.parts.numHoles++;
     if (false) {
       LOG(INFO) << "+++ left hole at " << idx << ", holes=" << segment_state_.parts.numHoles;
@@ -280,11 +253,11 @@ void IndirectReferenceTable::Dump(std::ostream& os) const {
   os << kind_ << " table dump:\n";
   ReferenceTable::Table entries;
   for (size_t i = 0; i < Capacity(); ++i) {
-    mirror::Object* obj = table_[i].Read<kWithoutReadBarrier>();
+    mirror::Object* obj = table_[i].GetReference()->Read<kWithoutReadBarrier>();
     if (UNLIKELY(obj == nullptr)) {
       // Remove NULLs.
     } else {
-      obj = table_[i].Read();
+      obj = table_[i].GetReference()->Read();
       entries.push_back(GcRoot<mirror::Object>(obj));
     }
   }
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 5291e508a9..168f9f2764 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -125,16 +125,6 @@ static inline IndirectRefKind GetIndirectRefKind(IndirectRef iref) {
   return static_cast<IndirectRefKind>(reinterpret_cast<uintptr_t>(iref) & 0x03);
 }
 
-/*
- * Extended debugging structure.  We keep a parallel array of these, one
- * per slot in the table.
- */
-static const size_t kIRTPrevCount = 4;
-struct IndirectRefSlot {
-  uint32_t serial;
-  const mirror::Object* previous[kIRTPrevCount];
-};
-
 /* use as initial value for "cookie", and when table has only one segment */
 static const uint32_t IRT_FIRST_SEGMENT = 0;
 
@@ -201,9 +191,35 @@ union IRTSegmentState {
   } parts;
 };
 
+// Try to choose kIRTPrevCount so that sizeof(IrtEntry) is a power of 2.
+// Contains multiple entries but only one active one, this helps us detect use after free errors
+// since the serial stored in the indirect ref wont match.
+static const size_t kIRTPrevCount = kIsDebugBuild ? 7 : 3;
+class PACKED(4) IrtEntry {
+ public:
+  void Add(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ++serial_;
+    if (serial_ == kIRTPrevCount) {
+      serial_ = 0;
+    }
+    references_[serial_] = GcRoot<mirror::Object>(obj);
+  }
+  GcRoot<mirror::Object>* GetReference() {
+    DCHECK_LT(serial_, kIRTPrevCount);
+    return &references_[serial_];
+  }
+  uint32_t GetSerial() const {
+    return serial_;
+  }
+
+ private:
+  uint32_t serial_;
+  GcRoot<mirror::Object> references_[kIRTPrevCount];
+};
+
 class IrtIterator {
  public:
-  explicit IrtIterator(GcRoot<mirror::Object>* table, size_t i, size_t capacity)
+  explicit IrtIterator(IrtEntry* table, size_t i, size_t capacity)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : table_(table), i_(i), capacity_(capacity) {
   }
@@ -215,7 +231,7 @@ class IrtIterator {
 
   mirror::Object** operator*() {
     // This does not have a read barrier as this is used to visit roots.
-    return table_[i_].AddressWithoutBarrier();
+    return table_[i_].GetReference()->AddressWithoutBarrier();
   }
 
   bool equals(const IrtIterator& rhs) const {
@@ -223,7 +239,7 @@ class IrtIterator {
   }
 
  private:
-  GcRoot<mirror::Object>* const table_;
+  IrtEntry* const table_;
   size_t i_;
   const size_t capacity_;
 };
@@ -316,9 +332,7 @@ class IndirectReferenceTable {
   }
 
  private:
-  /*
-   * Extract the table index from an indirect reference.
-   */
+  // Extract the table index from an indirect reference.
   static uint32_t ExtractIndex(IndirectRef iref) {
     uintptr_t uref = reinterpret_cast<uintptr_t>(iref);
     return (uref >> 2) & 0xffff;
@@ -330,25 +344,11 @@ class IndirectReferenceTable {
    */
   IndirectRef ToIndirectRef(uint32_t tableIndex) const {
     DCHECK_LT(tableIndex, 65536U);
-    uint32_t serialChunk = slot_data_[tableIndex].serial;
-    uintptr_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
+    uint32_t serialChunk = table_[tableIndex].GetSerial();
+    uintptr_t uref = (serialChunk << 20) | (tableIndex << 2) | kind_;
     return reinterpret_cast<IndirectRef>(uref);
   }
 
-  /*
-   * Update extended debug info when an entry is added.
-   *
-   * We advance the serial number, invalidating any outstanding references to
-   * this slot.
-   */
-  void UpdateSlotAdd(const mirror::Object* obj, int slot) {
-    if (slot_data_ != NULL) {
-      IndirectRefSlot* pSlot = &slot_data_[slot];
-      pSlot->serial++;
-      pSlot->previous[pSlot->serial % kIRTPrevCount] = obj;
-    }
-  }
-
   // Abort if check_jni is not enabled.
   static void AbortIfNoCheckJNI();
 
@@ -361,19 +361,13 @@ class IndirectReferenceTable {
 
   // Mem map where we store the indirect refs.
   std::unique_ptr<MemMap> table_mem_map_;
-  // Mem map where we store the extended debugging info.
-  std::unique_ptr<MemMap> slot_mem_map_;
   // bottom of the stack. Do not directly access the object references
   // in this as they are roots. Use Get() that has a read barrier.
-  GcRoot<mirror::Object>* table_;
+  IrtEntry* table_;
   /* bit mask, ORed into all irefs */
-  IndirectRefKind kind_;
-  /* extended debugging info */
-  IndirectRefSlot* slot_data_;
-  /* #of entries we have space for */
-  size_t alloc_entries_;
+  const IndirectRefKind kind_;
   /* max #of entries allowed */
-  size_t max_entries_;
+  const size_t max_entries_;
 };
 
 }  // namespace art
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index ae8eeac54d..de6d0f47d9 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -56,6 +56,7 @@ static constexpr InstructionSet kRuntimeISA = kNone;
 static constexpr size_t kArmPointerSize = 4;
 static constexpr size_t kArm64PointerSize = 8;
 static constexpr size_t kMipsPointerSize = 4;
+static constexpr size_t kMips64PointerSize = 8;
 static constexpr size_t kX86PointerSize = 4;
 static constexpr size_t kX86_64PointerSize = 8;
 
@@ -93,6 +94,8 @@ static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
       return kX86_64PointerSize;
     case kMips:
       return kMipsPointerSize;
+    case kMips64:
+      return kMips64PointerSize;
     case kNone:
       LOG(FATAL) << "ISA kNone does not have pointer size.";
       return 0;
@@ -114,6 +117,7 @@ static inline bool Is64BitInstructionSet(InstructionSet isa) {
 
     case kArm64:
     case kX86_64:
+    case kMips64:
       return true;
 
     case kNone:
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index a2e88a694e..15be6b752b 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -597,45 +597,52 @@ static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
   thread->ResetQuickAllocEntryPointsForThread();
 }
 
-void Instrumentation::SetEntrypointsInstrumented(bool instrumented, bool suspended) {
+void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
+  Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
   ThreadList* tl = runtime->GetThreadList();
-  if (suspended) {
-    Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  }
-  if (runtime->IsStarted() && !suspended) {
+  Locks::mutator_lock_->AssertNotHeld(self);
+  Locks::instrument_entrypoints_lock_->AssertHeld(self);
+  if (runtime->IsStarted()) {
     tl->SuspendAll();
   }
   {
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     SetQuickAllocEntryPointsInstrumented(instrumented);
     ResetQuickAllocEntryPoints();
   }
-  if (runtime->IsStarted() && !suspended) {
+  if (runtime->IsStarted()) {
     tl->ResumeAll();
   }
 }
 
-void Instrumentation::InstrumentQuickAllocEntryPoints(bool suspended) {
-  // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
-  //       should be guarded by a lock.
-  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
-  const bool enable_instrumentation =
-      quick_alloc_entry_points_instrumentation_counter_.FetchAndAddSequentiallyConsistent(1) == 0;
-  if (enable_instrumentation) {
-    SetEntrypointsInstrumented(true, suspended);
+void Instrumentation::InstrumentQuickAllocEntryPoints() {
+  MutexLock mu(Thread::Current(), *Locks::instrument_entrypoints_lock_);
+  InstrumentQuickAllocEntryPointsLocked();
+}
+
+void Instrumentation::UninstrumentQuickAllocEntryPoints() {
+  MutexLock mu(Thread::Current(), *Locks::instrument_entrypoints_lock_);
+  UninstrumentQuickAllocEntryPointsLocked();
+}
+
+void Instrumentation::InstrumentQuickAllocEntryPointsLocked() {
+  Locks::instrument_entrypoints_lock_->AssertHeld(Thread::Current());
+  if (quick_alloc_entry_points_instrumentation_counter_ == 0) {
+    SetEntrypointsInstrumented(true);
   }
+  ++quick_alloc_entry_points_instrumentation_counter_;
+  LOG(INFO) << "Counter: " << quick_alloc_entry_points_instrumentation_counter_;
 }
 
-void Instrumentation::UninstrumentQuickAllocEntryPoints(bool suspended) {
-  // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
-  //       should be guarded by a lock.
-  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
-  const bool disable_instrumentation =
-      quick_alloc_entry_points_instrumentation_counter_.FetchAndSubSequentiallyConsistent(1) == 1;
-  if (disable_instrumentation) {
-    SetEntrypointsInstrumented(false, suspended);
+void Instrumentation::UninstrumentQuickAllocEntryPointsLocked() {
+  Locks::instrument_entrypoints_lock_->AssertHeld(Thread::Current());
+  CHECK_GT(quick_alloc_entry_points_instrumentation_counter_, 0U);
+  --quick_alloc_entry_points_instrumentation_counter_;
+  if (quick_alloc_entry_points_instrumentation_counter_ == 0) {
+    SetEntrypointsInstrumented(false);
   }
+  LOG(INFO) << "Counter: " << quick_alloc_entry_points_instrumentation_counter_;
 }
 
 void Instrumentation::ResetQuickAllocEntryPoints() {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 3c1c756992..3017bf6a38 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -182,9 +182,13 @@ class Instrumentation {
     return interpreter_handler_table_;
   }
 
-  void InstrumentQuickAllocEntryPoints(bool suspended)
+  void InstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::instrument_entrypoints_lock_);
+  void UninstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::instrument_entrypoints_lock_);
+  void InstrumentQuickAllocEntryPointsLocked()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::instrument_entrypoints_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::runtime_shutdown_lock_);
-  void UninstrumentQuickAllocEntryPoints(bool suspended)
+  void UninstrumentQuickAllocEntryPointsLocked()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::instrument_entrypoints_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::runtime_shutdown_lock_);
   void ResetQuickAllocEntryPoints() EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
 
@@ -350,7 +354,7 @@ class Instrumentation {
 
   // No thread safety analysis to get around SetQuickAllocEntryPointsInstrumented requiring
   // exclusive access to mutator lock which you can't get if the runtime isn't started.
-  void SetEntrypointsInstrumented(bool instrumented, bool suspended) NO_THREAD_SAFETY_ANALYSIS;
+  void SetEntrypointsInstrumented(bool instrumented) NO_THREAD_SAFETY_ANALYSIS;
 
   void MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
                             mirror::ArtMethod* method, uint32_t dex_pc) const
@@ -455,8 +459,8 @@ class Instrumentation {
   InterpreterHandlerTable interpreter_handler_table_ GUARDED_BY(Locks::mutator_lock_);
 
   // Greater than 0 if quick alloc entry points instrumented.
-  // TODO: The access and changes to this is racy and should be guarded by a lock.
-  AtomicInteger quick_alloc_entry_points_instrumentation_counter_;
+  size_t quick_alloc_entry_points_instrumentation_counter_
+      GUARDED_BY(Locks::instrument_entrypoints_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index cd8cce06ae..07224efbfc 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -36,8 +36,8 @@ static void UnstartedRuntimeJni(Thread* self, ArtMethod* method,
     mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, &element_class);
     DCHECK(array_class != nullptr);
     gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
-    result->SetL(mirror::Array::Alloc<true>(self, array_class, length,
-                                            array_class->GetComponentSize(), allocator, true));
+    result->SetL(mirror::Array::Alloc<true, true>(self, array_class, length,
+                                                  array_class->GetComponentSizeShift(), allocator));
   } else if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
     result->SetL(NULL);
   } else if (name == "java.lang.Class dalvik.system.VMStack.getStackClass2()") {
@@ -316,7 +316,35 @@ enum InterpreterImplKind {
   kComputedGotoImplKind   // Computed-goto-based interpreter implementation.
 };
 
+#if !defined(__clang__)
 static constexpr InterpreterImplKind kInterpreterImplKind = kComputedGotoImplKind;
+#else
+// Clang 3.4 fails to build the goto interpreter implementation.
+static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
+                       ShadowFrame& shadow_frame, JValue result_register) {
+  LOG(FATAL) << "UNREACHABLE";
+  exit(0);
+}
+// Explicit definitions of ExecuteGotoImpl.
+template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+JValue ExecuteGotoImpl<true, false>(Thread* self, MethodHelper& mh,
+                                    const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+JValue ExecuteGotoImpl<false, false>(Thread* self, MethodHelper& mh,
+                                     const DexFile::CodeItem* code_item,
+                                     ShadowFrame& shadow_frame, JValue result_register);
+template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+JValue ExecuteGotoImpl<true, true>(Thread* self, MethodHelper& mh,
+                                    const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+JValue ExecuteGotoImpl<false, true>(Thread* self, MethodHelper& mh,
+                                     const DexFile::CodeItem* code_item,
+                                     ShadowFrame& shadow_frame, JValue result_register);
+#endif
 
 static JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
                       ShadowFrame& shadow_frame, JValue result_register)
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 9f0801351c..733f1d1394 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -96,22 +96,22 @@ bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst
     EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
 
 // iget-XXX
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot)
 
 // sget-XXX
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong)
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot)
 
 #undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
@@ -301,22 +301,22 @@ bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction
     EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, true);
 
 // iput-XXX
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot)
 
 // sput-XXX
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong)
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot)
 
 #undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
@@ -383,13 +383,13 @@ bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint1
   EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, false);     \
   EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, true);
 
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimInt);  // iput-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimBoolean);  // iput-boolean-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimByte);  // iput-byte-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimChar);  // iput-char-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimShort);  // iput-short-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimLong);  // iput-wide-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimNot);  // iput-object-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimInt)      // iput-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimBoolean)  // iput-boolean-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimByte)     // iput-byte-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimChar)     // iput-char-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimShort)    // iput-short-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimLong)     // iput-wide-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimNot)      // iput-object-quick.
 #undef EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
 
@@ -708,7 +708,8 @@ bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
     }
     return false;
   }
-  Object* newArray = Array::Alloc<true>(self, arrayClass, length, arrayClass->GetComponentSize(),
+  Object* newArray = Array::Alloc<true>(self, arrayClass, length,
+                                        arrayClass->GetComponentSizeShift(),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   if (UNLIKELY(newArray == NULL)) {
     DCHECK(self->IsExceptionPending());
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9358632382..a8345ad579 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -388,11 +388,11 @@ static inline bool IsBackwardBranch(int32_t branch_offset) {
   EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, false);   \
   EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, true);
 
-EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kStatic);      // invoke-static/range.
-EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kDirect);      // invoke-direct/range.
-EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kVirtual);     // invoke-virtual/range.
-EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kSuper);       // invoke-super/range.
-EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kInterface);   // invoke-interface/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kStatic)      // invoke-static/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kDirect)      // invoke-direct/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kVirtual)     // invoke-virtual/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kSuper)       // invoke-super/range.
+EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kInterface)   // invoke-interface/range.
 #undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4d3f8c9a06..d755cb98a9 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -485,7 +485,7 @@ MemMap::MemMap(const std::string& name, byte* begin, size_t size, void* base_beg
     MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
     maps_.insert(std::pair<void*, MemMap*>(base_begin_, this));
   }
-};
+}
 
 MemMap* MemMap::RemapAtEnd(byte* new_end, const char* tail_name, int tail_prot,
                            std::string* error_msg) {
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 213dbc20e9..6582226dd5 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -35,13 +35,13 @@ inline uint32_t Array::ClassSize() {
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
-  size_t component_size =
-      GetClass<kVerifyFlags, kReadBarrierOption>()->template GetComponentSize<kReadBarrierOption>();
+  size_t component_size_shift = GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template GetComponentSizeShift<kReadBarrierOption>();
   // Don't need to check this since we already check this in GetClass.
   int32_t component_count =
       GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>();
-  size_t header_size = DataOffset(component_size).SizeValue();
-  size_t data_size = component_count * component_size;
+  size_t header_size = DataOffset(1U << component_size_shift).SizeValue();
+  size_t data_size = component_count << component_size_shift;
   return header_size + data_size;
 }
 
@@ -56,24 +56,36 @@ inline bool Array::CheckIsValidIndex(int32_t index) {
 }
 
 static inline size_t ComputeArraySize(Thread* self, Class* array_class, int32_t component_count,
-                                      size_t component_size)
+                                      size_t component_size_shift)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(array_class != NULL);
   DCHECK_GE(component_count, 0);
   DCHECK(array_class->IsArrayClass());
 
+  size_t component_size = 1U << component_size_shift;
   size_t header_size = Array::DataOffset(component_size).SizeValue();
-  size_t data_size = component_count * component_size;
+  size_t data_size = static_cast<size_t>(component_count) << component_size_shift;
   size_t size = header_size + data_size;
 
-  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
-  size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size);
-  if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) {
+  // Check for size_t overflow and throw OutOfMemoryError if this was
+  // an unreasonable request.
+#ifdef __LP64__
+  // 64-bit. No overflow as component_count is 32-bit and the maximum
+  // component size is 8.
+  DCHECK_LE((1U << component_size_shift), 8U);
+#else
+  // 32-bit.
+  DCHECK_NE(header_size, 0U);
+  DCHECK_EQ(RoundUp(header_size, component_size), header_size);
+  // The array length limit (exclusive).
+  const size_t length_limit = (0U - header_size) >> component_size_shift;
+  if (UNLIKELY(length_limit <= static_cast<size_t>(component_count))) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
                                              PrettyDescriptor(array_class).c_str(),
                                              component_count).c_str());
     return 0;  // failure
   }
+#endif
   return size;
 }
 
@@ -103,8 +115,10 @@ class SetLengthVisitor {
 // array.
 class SetLengthToUsableSizeVisitor {
  public:
-  SetLengthToUsableSizeVisitor(int32_t min_length, size_t header_size, size_t component_size) :
-      minimum_length_(min_length), header_size_(header_size), component_size_(component_size) {
+  SetLengthToUsableSizeVisitor(int32_t min_length, size_t header_size,
+                               size_t component_size_shift) :
+      minimum_length_(min_length), header_size_(header_size),
+      component_size_shift_(component_size_shift) {
   }
 
   void operator()(Object* obj, size_t usable_size) const
@@ -112,10 +126,12 @@ class SetLengthToUsableSizeVisitor {
     // Avoid AsArray as object is not yet in live bitmap or allocation stack.
     Array* array = down_cast<Array*>(obj);
     // DCHECK(array->IsArrayInstance());
-    int32_t length = (usable_size - header_size_) / component_size_;
+    int32_t length = (usable_size - header_size_) >> component_size_shift_;
     DCHECK_GE(length, minimum_length_);
-    byte* old_end = reinterpret_cast<byte*>(array->GetRawData(component_size_, minimum_length_));
-    byte* new_end = reinterpret_cast<byte*>(array->GetRawData(component_size_, length));
+    byte* old_end = reinterpret_cast<byte*>(array->GetRawData(1U << component_size_shift_,
+                                                              minimum_length_));
+    byte* new_end = reinterpret_cast<byte*>(array->GetRawData(1U << component_size_shift_,
+                                                              length));
     // Ensure space beyond original allocation is zeroed.
     memset(old_end, 0, new_end - old_end);
     array->SetLength(length);
@@ -124,38 +140,46 @@ class SetLengthToUsableSizeVisitor {
  private:
   const int32_t minimum_length_;
   const size_t header_size_;
-  const size_t component_size_;
+  const size_t component_size_shift_;
 
   DISALLOW_COPY_AND_ASSIGN(SetLengthToUsableSizeVisitor);
 };
 
-template <bool kIsInstrumented>
+template <bool kIsInstrumented, bool kFillUsable>
 inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           size_t component_size, gc::AllocatorType allocator_type,
-                           bool fill_usable) {
+                           size_t component_size_shift, gc::AllocatorType allocator_type) {
   DCHECK(allocator_type != gc::kAllocatorTypeLOS);
-  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
+  DCHECK_EQ(array_class->GetComponentSizeShift(), component_size_shift);
+  DCHECK_EQ(array_class->GetComponentSize(), (1U << component_size_shift));
+  size_t size = ComputeArraySize(self, array_class, component_count, component_size_shift);
+#ifdef __LP64__
+  // 64-bit. No size_t overflow.
+  DCHECK_NE(size, 0U);
+#else
+  // 32-bit.
   if (UNLIKELY(size == 0)) {
     return nullptr;
   }
+#endif
   gc::Heap* heap = Runtime::Current()->GetHeap();
   Array* result;
-  if (!fill_usable) {
+  if (!kFillUsable) {
     SetLengthVisitor visitor(component_count);
     result = down_cast<Array*>(
         heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
                                                               allocator_type, visitor));
   } else {
-    SetLengthToUsableSizeVisitor visitor(component_count, DataOffset(component_size).SizeValue(),
-                                         component_size);
+    SetLengthToUsableSizeVisitor visitor(component_count,
+                                         DataOffset(1U << component_size_shift).SizeValue(),
+                                         component_size_shift);
     result = down_cast<Array*>(
         heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
                                                               allocator_type, visitor));
   }
   if (kIsDebugBuild && result != nullptr && Runtime::Current()->IsStarted()) {
     array_class = result->GetClass();  // In case the array class moved.
-    CHECK_EQ(array_class->GetComponentSize(), component_size);
-    if (!fill_usable) {
+    CHECK_EQ(array_class->GetComponentSize(), 1U << component_size_shift);
+    if (!kFillUsable) {
       CHECK_EQ(result->SizeOf(), size);
     } else {
       CHECK_GE(result->SizeOf(), size);
@@ -173,7 +197,8 @@ inline void PrimitiveArray<T>::VisitRoots(RootCallback* callback, void* arg) {
 
 template<typename T>
 inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
-  Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length, sizeof(T),
+  Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length,
+                                        ComponentSizeShiftWidth<sizeof(T)>(),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   return down_cast<PrimitiveArray<T>*>(raw_array);
 }
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 4535f6c437..636be3346a 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -48,7 +48,8 @@ static Array* RecursiveCreateMultiArray(Thread* self,
   StackHandleScope<1> hs(self);
   Handle<Array> new_array(
       hs.NewHandle(
-          Array::Alloc<true>(self, array_class.Get(), array_length, array_class->GetComponentSize(),
+          Array::Alloc<true>(self, array_class.Get(), array_length,
+                             array_class->GetComponentSizeShift(),
                              Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   if (UNLIKELY(new_array.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 7af88d6d86..521d7e7fea 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -33,13 +33,12 @@ class MANAGED Array : public Object {
   // The size of a java.lang.Class representing an array.
   static uint32_t ClassSize();
 
-  // Allocates an array with the given properties, if fill_usable is true the array will be of at
+  // Allocates an array with the given properties, if kFillUsable is true the array will be of at
   // least component_count size, however, if there's usable space at the end of the allocation the
   // array will fill it.
-  template <bool kIsInstrumented>
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                      size_t component_size, gc::AllocatorType allocator_type,
-                      bool fill_usable = false)
+  template <bool kIsInstrumented, bool kFillUsable = false>
+  ALWAYS_INLINE static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                                    size_t component_size_shift, gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Handle<Class> element_class,
@@ -66,12 +65,11 @@ class MANAGED Array : public Object {
   }
 
   static MemberOffset DataOffset(size_t component_size) {
-    if (component_size != sizeof(int64_t)) {
-      return OFFSET_OF_OBJECT_MEMBER(Array, first_element_);
-    } else {
-      // Align longs and doubles.
-      return MemberOffset(OFFSETOF_MEMBER(Array, first_element_) + 4);
-    }
+    DCHECK(IsPowerOfTwo(component_size)) << component_size;
+    size_t data_offset = RoundUp(OFFSETOF_MEMBER(Array, first_element_), component_size);
+    DCHECK_EQ(RoundUp(data_offset, component_size), data_offset)
+        << "Array data offset isn't aligned with component size";
+    return MemberOffset(data_offset);
   }
 
   void* GetRawData(size_t component_size, int32_t index)
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index d37fa41d3e..03425cc300 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -25,6 +25,9 @@
 #include "jvalue.h"
 #include "object-inl.h"
 #include "primitive.h"
+#include "thread-inl.h"
+#include "scoped_thread_state_change.h"
+#include "well_known_classes.h"
 
 namespace art {
 namespace mirror {
@@ -298,6 +301,14 @@ inline const DexFile* ArtField::GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutato
   return GetDexCache()->GetDexFile();
 }
 
+inline ArtField* ArtField::FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                              jobject jlr_field) {
+  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_reflect_Field_artField);
+  mirror::ArtField* field = f->GetObject(soa.Decode<mirror::Object*>(jlr_field))->AsArtField();
+  DCHECK(field != nullptr);
+  return field;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 3c7c6ce39a..7e20076112 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -31,14 +31,6 @@ namespace mirror {
 // TODO: Get global references for these
 GcRoot<Class> ArtField::java_lang_reflect_ArtField_;
 
-ArtField* ArtField::FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
-                                       jobject jlr_field) {
-  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_reflect_Field_artField);
-  mirror::ArtField* field = f->GetObject(soa.Decode<mirror::Object*>(jlr_field))->AsArtField();
-  DCHECK(field != nullptr);
-  return field;
-}
-
 void ArtField::SetClass(Class* java_lang_reflect_ArtField) {
   CHECK(java_lang_reflect_ArtField_.IsNull());
   CHECK(java_lang_reflect_ArtField != NULL);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 885bcb06ad..50299b670a 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -47,8 +47,8 @@ class MANAGED ArtField FINAL : public Object {
     return sizeof(ArtField);
   }
 
-  static ArtField* FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
-                                      jobject jlr_field)
+  ALWAYS_INLINE static ArtField* FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                                    jobject jlr_field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 159d04df40..787c76715a 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -105,9 +105,9 @@ void ArtMethod::SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_class
 }
 
 size_t ArtMethod::NumArgRegisters(const StringPiece& shorty) {
-  CHECK_LE(1, shorty.length());
+  CHECK_LE(1U, shorty.length());
   uint32_t num_registers = 0;
-  for (int i = 1; i < shorty.length(); ++i) {
+  for (size_t i = 1; i < shorty.length(); ++i) {
     char ch = shorty[i];
     if (ch == 'D' || ch == 'J') {
       num_registers += 2;
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 3f67468148..3d3ae166a1 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -510,8 +510,19 @@ inline void Class::SetName(String* name) {
 template<VerifyObjectFlags kVerifyFlags>
 inline Primitive::Type Class::GetPrimitiveType() {
   DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-  return static_cast<Primitive::Type>(
-      GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_)));
+  int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
+  Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF);
+  DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type));
+  return type;
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline size_t Class::GetPrimitiveTypeSizeShift() {
+  DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
+  int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
+  size_t size_shift = static_cast<Primitive::Type>(v32 >> 16);
+  DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF)));
+  return size_shift;
 }
 
 inline void Class::CheckObjectAlloc() {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 0ee8fa88e7..3fcb188697 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -294,7 +294,8 @@ void Class::SetReferenceInstanceOffsets(uint32_t new_reference_offsets) {
 
 bool Class::IsInSamePackage(const StringPiece& descriptor1, const StringPiece& descriptor2) {
   size_t i = 0;
-  while (descriptor1[i] != '\0' && descriptor1[i] == descriptor2[i]) {
+  size_t min_length = std::min(descriptor1.size(), descriptor2.size());
+  while (i < min_length && descriptor1[i] == descriptor2[i]) {
     ++i;
   }
   if (descriptor1.find('/', i) != StringPiece::npos ||
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 4a8d6dc83b..0acf6952c2 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -345,9 +345,16 @@ class MANAGED Class FINAL : public Object {
 
   void SetPrimitiveType(Primitive::Type new_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), new_type);
+    int32_t v32 = static_cast<int32_t>(new_type);
+    DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero";
+    // Store the component size shift in the upper 16 bits.
+    v32 |= Primitive::ComponentSizeShift(new_type) << 16;
+    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  size_t GetPrimitiveTypeSizeShift() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Returns true if the class is a primitive type.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsPrimitive() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -457,8 +464,12 @@ class MANAGED Class FINAL : public Object {
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return Primitive::ComponentSize(
-        GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveType());
+    return 1U << GetComponentSizeShift();
+  }
+
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  size_t GetComponentSizeShift() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveTypeSizeShift();
   }
 
   bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1149,8 +1160,9 @@ class MANAGED Class FINAL : public Object {
   // See also class_size_.
   uint32_t object_size_;
 
-  // Primitive type value, or Primitive::kPrimNot (0); set for generated primitive classes.
-  Primitive::Type primitive_type_;
+  // The lower 16 bits contains a Primitive::Type value. The upper 16
+  // bits contains the size shift of the primitive type.
+  uint32_t primitive_type_;
 
   // Bitmap of offsets of ifields.
   uint32_t reference_instance_offsets_;
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index c7540dcb7a..0ca44f8a3e 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -35,10 +35,13 @@ template<class T>
 inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
                                              int32_t length, gc::AllocatorType allocator_type) {
   Array* array = Array::Alloc<true>(self, object_array_class, length,
-                                    sizeof(HeapReference<Object>), allocator_type);
+                                    ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>(),
+                                    allocator_type);
   if (UNLIKELY(array == nullptr)) {
     return nullptr;
   } else {
+    DCHECK_EQ(array->GetClass()->GetComponentSizeShift(),
+              ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>());
     return array->AsObjectArray<T>();
   }
 }
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 1290a3d388..7fa664d2dc 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -162,19 +162,19 @@ TEST_F(ObjectTest, AllocArray) {
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[I");
   StackHandleScope<1> hs(soa.Self());
   MutableHandle<Array> a(
-      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
                                       Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
                               Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
                               Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
@@ -185,26 +185,26 @@ TEST_F(ObjectTest, AllocArray_FillUsable) {
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[B");
   StackHandleScope<1> hs(soa.Self());
   MutableHandle<Array> a(
-      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
-                                      Runtime::Current()->GetHeap()->GetCurrentAllocator(), true)));
+      hs.NewHandle(Array::Alloc<true, true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
+                                            Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[I");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(),
+                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(),
+                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                             Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(),
+                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 }
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 003815e7b9..65a79198a4 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -292,7 +292,7 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char*
   std::unique_ptr<const OatFile> oat_file(OatFile::Open(oat_filename, oat_filename, nullptr,
                                                         false, &error_msg));
   if (oat_file.get() == nullptr) {
-    if (kVerboseLogging) {
+    if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << oat_filename
           << "' for file location '" << filename << "': " << error_msg;
     }
@@ -319,13 +319,13 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char*
         return kUpToDate;
       } else if (should_relocate_if_possible &&
                   ClassLinker::VerifyOatImageChecksum(oat_file.get(), target_instruction_set)) {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " needs to be relocated for " << filename;
         }
         return kPatchoatNeeded;
       } else {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " is out of date for " << filename;
         }
@@ -343,13 +343,13 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char*
       } else if (location_checksum == oat_dex_file->GetDexFileLocationChecksum()
                   && should_relocate_if_possible
                   && ClassLinker::VerifyOatImageChecksum(oat_file.get(), target_instruction_set)) {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " needs to be relocated for " << filename;
         }
         return kPatchoatNeeded;
       } else {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " is out of date for " << filename;
         }
@@ -357,7 +357,7 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char*
       }
     }
   } else {
-    if (kVerboseLogging) {
+    if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                 << " does not contain " << filename;
     }
@@ -367,9 +367,10 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char*
 
 static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
     const char* pkgname, const char* instruction_set, const jboolean defer) {
-  // TODO disable this logging.
-  const bool kVerboseLogging = false;  // Spammy logging.
-  const bool kReasonLogging = true;  // Logging of reason for returning JNI_TRUE.
+  // Spammy logging for kUpToDate
+  const bool kVerboseLogging = false;
+  // Logging of reason for returning kDexoptNeeded or kPatchoatNeeded.
+  const bool kReasonLogging = true;
 
   if ((filename == nullptr) || !OS::FileExists(filename)) {
     LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index d8a537f948..ceff2065ba 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -60,11 +60,11 @@ static jobjectArray VMDebug_getVmFeatureList(JNIEnv* env, jclass) {
 }
 
 static void VMDebug_startAllocCounting(JNIEnv*, jclass) {
-  Runtime::Current()->SetStatsEnabled(true, false);
+  Runtime::Current()->SetStatsEnabled(true);
 }
 
 static void VMDebug_stopAllocCounting(JNIEnv*, jclass) {
-  Runtime::Current()->SetStatsEnabled(false, false);
+  Runtime::Current()->SetStatsEnabled(false);
 }
 
 static jint VMDebug_getAllocCount(JNIEnv*, jclass, jint kind) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index e1d9fc769e..64d4fe2715 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -73,7 +73,8 @@ static jobject VMRuntime_newNonMovableArray(JNIEnv* env, jobject, jclass javaEle
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentNonMovingAllocator();
   mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                     array_class->GetComponentSize(), allocator);
+                                                     array_class->GetComponentSizeShift(),
+                                                     allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -95,9 +96,9 @@ static jobject VMRuntime_newUnpaddedArray(JNIEnv* env, jobject, jclass javaEleme
     return nullptr;
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
-  mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                     array_class->GetComponentSize(), allocator,
-                                                     true);
+  mirror::Array* result = mirror::Array::Alloc<true, true>(soa.Self(), array_class, length,
+                                                           array_class->GetComponentSizeShift(),
+                                                           allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index c3c8c2576e..e469126206 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -111,15 +111,17 @@ static void ZygoteHooks_nativePostForkChild(JNIEnv* env, jclass, jlong token, ji
   thread->InitAfterFork();
   EnableDebugFeatures(debug_flags);
 
-  Runtime::NativeBridgeAction action = Runtime::NativeBridgeAction::kUnload;
   if (instruction_set != nullptr) {
     ScopedUtfChars isa_string(env, instruction_set);
     InstructionSet isa = GetInstructionSetFromString(isa_string.c_str());
+    Runtime::NativeBridgeAction action = Runtime::NativeBridgeAction::kUnload;
     if (isa != kNone && isa != kRuntimeISA) {
       action = Runtime::NativeBridgeAction::kInitialize;
     }
+    Runtime::Current()->DidForkFromZygote(env, action, isa_string.c_str());
+  } else {
+    Runtime::Current()->DidForkFromZygote(env, Runtime::NativeBridgeAction::kUnload, nullptr);
   }
-  Runtime::Current()->DidForkFromZygote(action);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 058458fa39..763a6645b9 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -59,9 +59,10 @@ static jobject Array_createObjectArray(JNIEnv* env, jclass, jclass javaElementCl
     return NULL;
   }
   DCHECK(array_class->IsObjectArrayClass());
-  mirror::Array* new_array = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                        sizeof(mirror::HeapReference<mirror::Object>),
-                                                        runtime->GetHeap()->GetCurrentAllocator());
+  mirror::Array* new_array = mirror::Array::Alloc<true>(
+      soa.Self(), array_class, length,
+      ComponentSizeShiftWidth<sizeof(mirror::HeapReference<mirror::Object>)>(),
+      runtime->GetHeap()->GetCurrentAllocator());
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index ad88109a86..d166be030a 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -23,29 +23,43 @@
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
-#include "reflection.h"
+#include "reflection-inl.h"
 #include "scoped_fast_native_object_access.h"
 
 namespace art {
 
-static bool VerifyFieldAccess(mirror::ArtField* field, mirror::Object* obj, bool is_set)
+template<bool kIsSet>
+ALWAYS_INLINE inline static bool VerifyFieldAccess(Thread* self, mirror::ArtField* field,
+                                                   mirror::Object* obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (field->IsFinal() && is_set) {
-    ThrowIllegalAccessException(nullptr, StringPrintf("Cannot set final field: %s",
-                                                      PrettyField(field).c_str()).c_str());
+  if (kIsSet && field->IsFinal()) {
+    ThrowIllegalAccessException(nullptr,
+            StringPrintf("Cannot set %s field %s of class %s",
+                PrettyJavaAccessFlags(field->GetAccessFlags()).c_str(),
+                PrettyField(field).c_str(),
+                field->GetDeclaringClass() == nullptr ? "null" :
+                    PrettyClass(field->GetDeclaringClass()).c_str()).c_str());
     return false;
   }
-  if (!VerifyAccess(obj, field->GetDeclaringClass(), field->GetAccessFlags())) {
-    ThrowIllegalAccessException(nullptr, StringPrintf("Cannot access field: %s",
-                                                      PrettyField(field).c_str()).c_str());
+  mirror::Class* calling_class = nullptr;
+  if (!VerifyAccess(self, obj, field->GetDeclaringClass(), field->GetAccessFlags(),
+                    &calling_class)) {
+    ThrowIllegalAccessException(nullptr,
+            StringPrintf("Class %s cannot access %s field %s of class %s",
+                calling_class == nullptr ? "null" : PrettyClass(calling_class).c_str(),
+                PrettyJavaAccessFlags(field->GetAccessFlags()).c_str(),
+                PrettyField(field).c_str(),
+                field->GetDeclaringClass() == nullptr ? "null" :
+                    PrettyClass(field->GetDeclaringClass()).c_str()).c_str());
     return false;
   }
   return true;
 }
 
-static bool GetFieldValue(const ScopedFastNativeObjectAccess& soa, mirror::Object* o,
-                          mirror::ArtField* f, Primitive::Type field_type, bool allow_references,
-                          JValue* value)
+template<bool kAllowReferences>
+ALWAYS_INLINE inline static bool GetFieldValue(
+    const ScopedFastNativeObjectAccess& soa, mirror::Object* o, mirror::ArtField* f,
+    Primitive::Type field_type, JValue* value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK_EQ(value->GetJ(), INT64_C(0));
   switch (field_type) {
@@ -74,7 +88,7 @@ static bool GetFieldValue(const ScopedFastNativeObjectAccess& soa, mirror::Objec
       value->SetS(f->GetShort(o));
       return true;
     case Primitive::kPrimNot:
-      if (allow_references) {
+      if (kAllowReferences) {
         value->SetL(f->GetObject(o));
         return true;
       }
@@ -89,29 +103,29 @@ static bool GetFieldValue(const ScopedFastNativeObjectAccess& soa, mirror::Objec
   return false;
 }
 
-static bool CheckReceiver(const ScopedFastNativeObjectAccess& soa, jobject j_rcvr,
-                          mirror::ArtField** f, mirror::Object** class_or_rcvr)
+ALWAYS_INLINE inline static bool CheckReceiver(const ScopedFastNativeObjectAccess& soa,
+                                               jobject j_rcvr, mirror::ArtField** f,
+                                               mirror::Object** class_or_rcvr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   soa.Self()->AssertThreadSuspensionIsAllowable();
+  mirror::Class* declaringClass = (*f)->GetDeclaringClass();
   if ((*f)->IsStatic()) {
-    StackHandleScope<2> hs(soa.Self());
-    HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(f));
-    Handle<mirror::Class> h_klass(hs.NewHandle((*f)->GetDeclaringClass()));
-    if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(soa.Self(), h_klass, true,
-                                                                          true))) {
-      DCHECK(soa.Self()->IsExceptionPending());
-      *class_or_rcvr = nullptr;
-      return false;
+    if (UNLIKELY(!declaringClass->IsInitialized())) {
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      StackHandleScope<2> hs(soa.Self());
+      HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(f));
+      HandleWrapper<mirror::Class> h_klass(hs.NewHandleWrapper(&declaringClass));
+      if (UNLIKELY(!class_linker->EnsureInitialized(soa.Self(), h_klass, true, true))) {
+        DCHECK(soa.Self()->IsExceptionPending());
+        return false;
+      }
     }
-    *class_or_rcvr = h_klass.Get();
+    *class_or_rcvr = declaringClass;
     return true;
   }
-
   *class_or_rcvr = soa.Decode<mirror::Object*>(j_rcvr);
-  mirror::Class* declaringClass = (*f)->GetDeclaringClass();
   if (!VerifyObjectIsClass(*class_or_rcvr, declaringClass)) {
     DCHECK(soa.Self()->IsExceptionPending());
-    *class_or_rcvr = nullptr;
     return false;
   }
   return true;
@@ -126,7 +140,7 @@ static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj, jboole
     return nullptr;
   }
   // If field is not set to be accessible, verify it can be accessed by the caller.
-  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, false)) {
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess<false>(soa.Self(), f, o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
@@ -134,15 +148,16 @@ static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj, jboole
   // Get the field's value, boxing if necessary.
   Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   JValue value;
-  if (!GetFieldValue(soa, o, f, field_type, true, &value)) {
+  if (!GetFieldValue<true>(soa, o, f, field_type, &value)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
   return soa.AddLocalReference<jobject>(BoxPrimitive(field_type, value));
 }
 
-static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
-                                char dst_descriptor, jboolean accessible) {
+template<Primitive::Type kPrimitiveType>
+ALWAYS_INLINE inline static JValue GetPrimitiveField(JNIEnv* env, jobject javaField,
+                                                     jobject javaObj, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
@@ -152,7 +167,7 @@ static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
   }
 
   // If field is not set to be accessible, verify it can be accessed by the caller.
-  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, false)) {
+  if (accessible == JNI_FALSE && !VerifyFieldAccess<false>(soa.Self(), f, o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
@@ -161,15 +176,22 @@ static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
   // Read the value.
   Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   JValue field_value;
-  if (!GetFieldValue(soa, o, f, field_type, false, &field_value)) {
+  if (field_type == kPrimitiveType) {
+    // This if statement should get optimized out since we only pass in valid primitive types.
+    if (UNLIKELY(!GetFieldValue<false>(soa, o, f, kPrimitiveType, &field_value))) {
+      DCHECK(soa.Self()->IsExceptionPending());
+      return JValue();
+    }
+    return field_value;
+  }
+  if (!GetFieldValue<false>(soa, o, f, field_type, &field_value)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
-
   // Widen it if necessary (and possible).
   JValue wide_value;
-  if (!ConvertPrimitiveValue(NULL, false, field_type, Primitive::GetType(dst_descriptor),
-                             field_value, &wide_value)) {
+  if (!ConvertPrimitiveValue(nullptr, false, field_type, kPrimitiveType, field_value,
+                             &wide_value)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
@@ -178,36 +200,36 @@ static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
 
 static jboolean Field_getBoolean(JNIEnv* env, jobject javaField, jobject javaObj,
                                  jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'Z', accessible).GetZ();
+  return GetPrimitiveField<Primitive::kPrimBoolean>(env, javaField, javaObj, accessible).GetZ();
 }
 
 static jbyte Field_getByte(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'B', accessible).GetB();
+  return GetPrimitiveField<Primitive::kPrimByte>(env, javaField, javaObj, accessible).GetB();
 }
 
 static jchar Field_getChar(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'C', accessible).GetC();
+  return GetPrimitiveField<Primitive::kPrimChar>(env, javaField, javaObj, accessible).GetC();
 }
 
 static jdouble Field_getDouble(JNIEnv* env, jobject javaField, jobject javaObj,
                                jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'D', accessible).GetD();
+  return GetPrimitiveField<Primitive::kPrimDouble>(env, javaField, javaObj, accessible).GetD();
 }
 
 static jfloat Field_getFloat(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'F', accessible).GetF();
+  return GetPrimitiveField<Primitive::kPrimFloat>(env, javaField, javaObj, accessible).GetF();
 }
 
 static jint Field_getInt(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'I', accessible).GetI();
+  return GetPrimitiveField<Primitive::kPrimInt>(env, javaField, javaObj, accessible).GetI();
 }
 
 static jlong Field_getLong(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'J', accessible).GetJ();
+  return GetPrimitiveField<Primitive::kPrimLong>(env, javaField, javaObj, accessible).GetJ();
 }
 
 static jshort Field_getShort(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
-  return GetPrimitiveField(env, javaField, javaObj, 'S', accessible).GetS();
+  return GetPrimitiveField<Primitive::kPrimShort>(env, javaField, javaObj, accessible).GetS();
 }
 
 static void SetFieldValue(ScopedFastNativeObjectAccess& soa, mirror::Object* o,
@@ -290,14 +312,15 @@ static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject j
     return;
   }
   // If field is not set to be accessible, verify it can be accessed by the caller.
-  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, true)) {
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess<true>(soa.Self(), f, o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
   SetFieldValue(soa, o, f, field_prim_type, true, unboxed_value);
 }
 
-static void SetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj, char src_descriptor,
+template<Primitive::Type kPrimitiveType>
+static void SetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
                               const JValue& new_value, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
@@ -314,14 +337,13 @@ static void SetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj, c
 
   // Widen the value if necessary (and possible).
   JValue wide_value;
-  if (!ConvertPrimitiveValue(nullptr, false, Primitive::GetType(src_descriptor),
-                             field_type, new_value, &wide_value)) {
+  if (!ConvertPrimitiveValue(nullptr, false, kPrimitiveType, field_type, new_value, &wide_value)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
 
   // If field is not set to be accessible, verify it can be accessed by the caller.
-  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, true)) {
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess<true>(soa.Self(), f, o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
@@ -334,56 +356,56 @@ static void Field_setBoolean(JNIEnv* env, jobject javaField, jobject javaObj, jb
                              jboolean accessible) {
   JValue value;
   value.SetZ(z);
-  SetPrimitiveField(env, javaField, javaObj, 'Z', value, accessible);
+  SetPrimitiveField<Primitive::kPrimBoolean>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setByte(JNIEnv* env, jobject javaField, jobject javaObj, jbyte b,
                           jboolean accessible) {
   JValue value;
   value.SetB(b);
-  SetPrimitiveField(env, javaField, javaObj, 'B', value, accessible);
+  SetPrimitiveField<Primitive::kPrimByte>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setChar(JNIEnv* env, jobject javaField, jobject javaObj, jchar c,
                           jboolean accessible) {
   JValue value;
   value.SetC(c);
-  SetPrimitiveField(env, javaField, javaObj, 'C', value, accessible);
+  SetPrimitiveField<Primitive::kPrimChar>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setDouble(JNIEnv* env, jobject javaField, jobject javaObj, jdouble d,
                             jboolean accessible) {
   JValue value;
   value.SetD(d);
-  SetPrimitiveField(env, javaField, javaObj, 'D', value, accessible);
+  SetPrimitiveField<Primitive::kPrimDouble>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setFloat(JNIEnv* env, jobject javaField, jobject javaObj, jfloat f,
                            jboolean accessible) {
   JValue value;
   value.SetF(f);
-  SetPrimitiveField(env, javaField, javaObj, 'F', value, accessible);
+  SetPrimitiveField<Primitive::kPrimFloat>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setInt(JNIEnv* env, jobject javaField, jobject javaObj, jint i,
                          jboolean accessible) {
   JValue value;
   value.SetI(i);
-  SetPrimitiveField(env, javaField, javaObj, 'I', value, accessible);
+  SetPrimitiveField<Primitive::kPrimInt>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setLong(JNIEnv* env, jobject javaField, jobject javaObj, jlong j,
                           jboolean accessible) {
   JValue value;
   value.SetJ(j);
-  SetPrimitiveField(env, javaField, javaObj, 'J', value, accessible);
+  SetPrimitiveField<Primitive::kPrimLong>(env, javaField, javaObj, value, accessible);
 }
 
 static void Field_setShort(JNIEnv* env, jobject javaField, jobject javaObj, jshort s,
                            jboolean accessible) {
   JValue value;
   value.SetS(s);
-  SetPrimitiveField(env, javaField, javaObj, 'S', value, accessible);
+  SetPrimitiveField<Primitive::kPrimShort>(env, javaField, javaObj, value, accessible);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 453c92f495..bc191b4289 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -16,6 +16,9 @@
 
 #include "native_bridge_art_interface.h"
 
+#include "nativebridge/native_bridge.h"
+
+#include "base/logging.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change.h"
@@ -91,4 +94,42 @@ uint32_t GetNativeMethods(JNIEnv* env, jclass clazz, JNINativeMethod* methods,
   return count;
 }
 
-};  // namespace art
+// Native bridge library runtime callbacks. They represent the runtime interface to native bridge.
+//
+// The interface is expected to expose the following methods:
+// getMethodShorty(): in the case of native method calling JNI native function CallXXXXMethodY(),
+//   native bridge calls back to VM for the shorty of the method so that it can prepare based on
+//   host calling convention.
+// getNativeMethodCount() and getNativeMethods(): in case of JNI function UnregisterNatives(),
+//   native bridge can call back to get all native methods of specified class so that all
+//   corresponding trampolines can be destroyed.
+static android::NativeBridgeRuntimeCallbacks native_bridge_art_callbacks_ {
+  GetMethodShorty, GetNativeMethodCount, GetNativeMethods
+};
+
+void LoadNativeBridge(std::string& native_bridge_library_filename) {
+  android::LoadNativeBridge(native_bridge_library_filename.c_str(), &native_bridge_art_callbacks_);
+  VLOG(startup) << "Runtime::Setup native bridge library: "
+      << (native_bridge_library_filename.empty() ? "(empty)" : native_bridge_library_filename);
+}
+
+void PreInitializeNativeBridge(std::string dir) {
+  VLOG(startup) << "Runtime::Pre-initialize native bridge";
+#ifndef __APPLE__  // Mac OS does not support CLONE_NEWNS.
+  if (unshare(CLONE_NEWNS) == -1) {
+    LOG(WARNING) << "Could not create mount namespace.";
+    return;
+  }
+  android::PreInitializeNativeBridge(dir.c_str(), GetInstructionSetString(kRuntimeISA));
+#endif
+}
+
+void InitializeNativeBridge(JNIEnv* env, const char* instruction_set) {
+  android::InitializeNativeBridge(env, instruction_set);
+}
+
+void UnloadNativeBridge() {
+  android::UnloadNativeBridge();
+}
+
+}  // namespace art
diff --git a/runtime/native_bridge_art_interface.h b/runtime/native_bridge_art_interface.h
index 08735c8955..026cd82c15 100644
--- a/runtime/native_bridge_art_interface.h
+++ b/runtime/native_bridge_art_interface.h
@@ -19,16 +19,22 @@
 
 #include <jni.h>
 #include <stdint.h>
+#include <string>
 
 namespace art {
 
-const char* GetMethodShorty(JNIEnv* env, jmethodID mid);
+// Mirror libnativebridge interface. Done to have the ART callbacks out of line, and not require
+// the system/core header file in other files.
 
-uint32_t GetNativeMethodCount(JNIEnv* env, jclass clazz);
+void LoadNativeBridge(std::string& native_bridge_library_filename);
 
-uint32_t GetNativeMethods(JNIEnv* env, jclass clazz, JNINativeMethod* methods,
-                          uint32_t method_count);
+// This is mostly for testing purposes, as in a full system this is called by Zygote code.
+void PreInitializeNativeBridge(std::string dir);
 
-};  // namespace art
+void InitializeNativeBridge(JNIEnv* env, const char* instruction_set);
+
+void UnloadNativeBridge();
+
+}  // namespace art
 
 #endif  // ART_RUNTIME_NATIVE_BRIDGE_ART_INTERFACE_H_
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 43173cad15..6810d73cbb 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '4', '0', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '4', '2', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 36ad662ff3..afcc64ded3 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -26,6 +26,24 @@ namespace art {
 
 static constexpr size_t kObjectReferenceSize = 4;
 
+
+template<size_t kComponentSize>
+size_t ComponentSizeShiftWidth() {
+  switch (kComponentSize) {
+    case 1:
+      return 0U;
+    case 2:
+      return 1U;
+    case 4:
+      return 2U;
+    case 8:
+      return 3U;
+    default:
+      LOG(FATAL) << "Unexpected component size : " << kComponentSize;
+      return 0U;
+  }
+}
+
 class Primitive {
  public:
   enum Type {
@@ -66,6 +84,24 @@ class Primitive {
     }
   }
 
+  static size_t ComponentSizeShift(Type type) {
+    switch (type) {
+      case kPrimVoid:
+      case kPrimBoolean:
+      case kPrimByte:    return 0;
+      case kPrimChar:
+      case kPrimShort:   return 1;
+      case kPrimInt:
+      case kPrimFloat:   return 2;
+      case kPrimLong:
+      case kPrimDouble:  return 3;
+      case kPrimNot:     return ComponentSizeShiftWidth<kObjectReferenceSize>();
+      default:
+        LOG(FATAL) << "Invalid type " << static_cast<int>(type);
+        return 0;
+    }
+  }
+
   static size_t ComponentSize(Type type) {
     switch (type) {
       case kPrimVoid:    return 0;
diff --git a/runtime/reflection-inl.h b/runtime/reflection-inl.h
new file mode 100644
index 0000000000..be4d5603f9
--- /dev/null
+++ b/runtime/reflection-inl.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_REFLECTION_INL_H_
+#define ART_RUNTIME_REFLECTION_INL_H_
+
+#include "reflection.h"
+
+#include "base/stringprintf.h"
+#include "common_throws.h"
+#include "jvalue.h"
+#include "primitive.h"
+#include "utils.h"
+
+namespace art {
+
+inline bool ConvertPrimitiveValue(const ThrowLocation* throw_location, bool unbox_for_result,
+                                  Primitive::Type srcType, Primitive::Type dstType,
+                                  const JValue& src, JValue* dst) {
+  DCHECK(srcType != Primitive::kPrimNot && dstType != Primitive::kPrimNot);
+  if (LIKELY(srcType == dstType)) {
+    dst->SetJ(src.GetJ());
+    return true;
+  }
+  switch (dstType) {
+  case Primitive::kPrimBoolean:  // Fall-through.
+  case Primitive::kPrimChar:  // Fall-through.
+  case Primitive::kPrimByte:
+    // Only expect assignment with source and destination of identical type.
+    break;
+  case Primitive::kPrimShort:
+    if (srcType == Primitive::kPrimByte) {
+      dst->SetS(src.GetI());
+      return true;
+    }
+    break;
+  case Primitive::kPrimInt:
+    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
+        srcType == Primitive::kPrimShort) {
+      dst->SetI(src.GetI());
+      return true;
+    }
+    break;
+  case Primitive::kPrimLong:
+    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
+        srcType == Primitive::kPrimShort || srcType == Primitive::kPrimInt) {
+      dst->SetJ(src.GetI());
+      return true;
+    }
+    break;
+  case Primitive::kPrimFloat:
+    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
+        srcType == Primitive::kPrimShort || srcType == Primitive::kPrimInt) {
+      dst->SetF(src.GetI());
+      return true;
+    } else if (srcType == Primitive::kPrimLong) {
+      dst->SetF(src.GetJ());
+      return true;
+    }
+    break;
+  case Primitive::kPrimDouble:
+    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
+        srcType == Primitive::kPrimShort || srcType == Primitive::kPrimInt) {
+      dst->SetD(src.GetI());
+      return true;
+    } else if (srcType == Primitive::kPrimLong) {
+      dst->SetD(src.GetJ());
+      return true;
+    } else if (srcType == Primitive::kPrimFloat) {
+      dst->SetD(src.GetF());
+      return true;
+    }
+    break;
+  default:
+    break;
+  }
+  if (!unbox_for_result) {
+    ThrowIllegalArgumentException(throw_location,
+                                  StringPrintf("Invalid primitive conversion from %s to %s",
+                                               PrettyDescriptor(srcType).c_str(),
+                                               PrettyDescriptor(dstType).c_str()).c_str());
+  } else {
+    ThrowClassCastException(throw_location,
+                            StringPrintf("Couldn't convert result of type %s to %s",
+                                         PrettyDescriptor(srcType).c_str(),
+                                         PrettyDescriptor(dstType).c_str()).c_str());
+  }
+  return false;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_REFLECTION_INL_H_
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 9fe296a6b1..23f8076e0f 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "reflection.h"
+#include "reflection-inl.h"
 
 #include "class_linker.h"
 #include "common_throws.h"
@@ -592,9 +592,16 @@ jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaM
   }
 
   // If method is not set to be accessible, verify it can be accessed by the caller.
-  if (!accessible && !VerifyAccess(receiver, declaring_class, m->GetAccessFlags())) {
-    ThrowIllegalAccessException(nullptr, StringPrintf("Cannot access method: %s",
-                                                      PrettyMethod(m).c_str()).c_str());
+  mirror::Class* calling_class = nullptr;
+  if (!accessible && !VerifyAccess(soa.Self(), receiver, declaring_class, m->GetAccessFlags(),
+                                   &calling_class)) {
+    ThrowIllegalAccessException(nullptr,
+        StringPrintf("Class %s cannot access %s method %s of class %s",
+            calling_class == nullptr ? "null" : PrettyClass(calling_class).c_str(),
+            PrettyJavaAccessFlags(m->GetAccessFlags()).c_str(),
+            PrettyMethod(m).c_str(),
+            m->GetDeclaringClass() == nullptr ? "null" :
+                PrettyClass(m->GetDeclaringClass()).c_str()).c_str());
     return nullptr;
   }
 
@@ -644,80 +651,6 @@ bool VerifyObjectIsClass(mirror::Object* o, mirror::Class* c) {
   return true;
 }
 
-bool ConvertPrimitiveValue(const ThrowLocation* throw_location, bool unbox_for_result,
-                           Primitive::Type srcType, Primitive::Type dstType,
-                           const JValue& src, JValue* dst) {
-  DCHECK(srcType != Primitive::kPrimNot && dstType != Primitive::kPrimNot);
-  if (LIKELY(srcType == dstType)) {
-    dst->SetJ(src.GetJ());
-    return true;
-  }
-  switch (dstType) {
-  case Primitive::kPrimBoolean:  // Fall-through.
-  case Primitive::kPrimChar:  // Fall-through.
-  case Primitive::kPrimByte:
-    // Only expect assignment with source and destination of identical type.
-    break;
-  case Primitive::kPrimShort:
-    if (srcType == Primitive::kPrimByte) {
-      dst->SetS(src.GetI());
-      return true;
-    }
-    break;
-  case Primitive::kPrimInt:
-    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
-        srcType == Primitive::kPrimShort) {
-      dst->SetI(src.GetI());
-      return true;
-    }
-    break;
-  case Primitive::kPrimLong:
-    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
-        srcType == Primitive::kPrimShort || srcType == Primitive::kPrimInt) {
-      dst->SetJ(src.GetI());
-      return true;
-    }
-    break;
-  case Primitive::kPrimFloat:
-    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
-        srcType == Primitive::kPrimShort || srcType == Primitive::kPrimInt) {
-      dst->SetF(src.GetI());
-      return true;
-    } else if (srcType == Primitive::kPrimLong) {
-      dst->SetF(src.GetJ());
-      return true;
-    }
-    break;
-  case Primitive::kPrimDouble:
-    if (srcType == Primitive::kPrimByte || srcType == Primitive::kPrimChar ||
-        srcType == Primitive::kPrimShort || srcType == Primitive::kPrimInt) {
-      dst->SetD(src.GetI());
-      return true;
-    } else if (srcType == Primitive::kPrimLong) {
-      dst->SetD(src.GetJ());
-      return true;
-    } else if (srcType == Primitive::kPrimFloat) {
-      dst->SetD(src.GetF());
-      return true;
-    }
-    break;
-  default:
-    break;
-  }
-  if (!unbox_for_result) {
-    ThrowIllegalArgumentException(throw_location,
-                                  StringPrintf("Invalid primitive conversion from %s to %s",
-                                               PrettyDescriptor(srcType).c_str(),
-                                               PrettyDescriptor(dstType).c_str()).c_str());
-  } else {
-    ThrowClassCastException(throw_location,
-                            StringPrintf("Couldn't convert result of type %s to %s",
-                                         PrettyDescriptor(srcType).c_str(),
-                                         PrettyDescriptor(dstType).c_str()).c_str());
-  }
-  return false;
-}
-
 mirror::Object* BoxPrimitive(Primitive::Type src_class, const JValue& value) {
   if (src_class == Primitive::kPrimNot) {
     return value.GetL();
@@ -889,18 +822,23 @@ bool UnboxPrimitiveForResult(const ThrowLocation& throw_location, mirror::Object
   return UnboxPrimitive(&throw_location, o, dst_class, nullptr, unboxed_value);
 }
 
-bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags) {
-  NthCallerVisitor visitor(Thread::Current(), 2);
+bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
+                  uint32_t access_flags, mirror::Class** calling_class) {
+  if ((access_flags & kAccPublic) != 0) {
+    return true;
+  }
+  NthCallerVisitor visitor(self, 2);
   visitor.WalkStack();
   if (UNLIKELY(visitor.caller == nullptr)) {
     // The caller is an attached native thread.
-    return (access_flags & kAccPublic) != 0;
+    return false;
   }
   mirror::Class* caller_class = visitor.caller->GetDeclaringClass();
-
-  if (((access_flags & kAccPublic) != 0) || (caller_class == declaring_class)) {
+  if (caller_class == declaring_class) {
     return true;
   }
+  ScopedAssertNoThreadSuspension sants(self, "verify-access");
+  *calling_class = caller_class;
   if ((access_flags & kAccPrivate) != 0) {
     return false;
   }
@@ -912,10 +850,7 @@ bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t
       return true;
     }
   }
-  if (!declaring_class->IsInSamePackage(caller_class)) {
-    return false;
-  }
-  return true;
+  return declaring_class->IsInSamePackage(caller_class);
 }
 
 }  // namespace art
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 61370c650e..23d8e05452 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -43,9 +43,9 @@ bool UnboxPrimitiveForResult(const ThrowLocation& throw_location, mirror::Object
                              mirror::Class* dst_class, JValue* unboxed_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-bool ConvertPrimitiveValue(const ThrowLocation* throw_location, bool unbox_for_result,
-                           Primitive::Type src_class, Primitive::Type dst_class,
-                           const JValue& src, JValue* dst)
+ALWAYS_INLINE bool ConvertPrimitiveValue(const ThrowLocation* throw_location, bool unbox_for_result,
+                                         Primitive::Type src_class, Primitive::Type dst_class,
+                                         const JValue& src, JValue* dst)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
@@ -75,7 +75,8 @@ jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject metho
 bool VerifyObjectIsClass(mirror::Object* o, mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags)
+bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
+                  uint32_t access_flags, mirror::Class** calling_class)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 }  // namespace art
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8386cc0613..49f8c63d75 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -144,8 +144,7 @@ Runtime::Runtime()
       target_sdk_version_(0),
       implicit_null_checks_(false),
       implicit_so_checks_(false),
-      implicit_suspend_checks_(false),
-      native_bridge_art_callbacks_({GetMethodShorty, GetNativeMethodCount, GetNativeMethods}) {
+      implicit_suspend_checks_(false) {
 }
 
 Runtime::~Runtime() {
@@ -419,18 +418,23 @@ bool Runtime::Start() {
 
   Thread::FinishStartup();
 
+  system_class_loader_ = CreateSystemClassLoader();
+
   if (is_zygote_) {
     if (!InitZygote()) {
       return false;
     }
   } else {
-    DidForkFromZygote(NativeBridgeAction::kInitialize);
+    bool have_native_bridge = !native_bridge_library_filename_.empty();
+    if (have_native_bridge) {
+      PreInitializeNativeBridge(".");
+    }
+    DidForkFromZygote(self->GetJniEnv(), have_native_bridge ? NativeBridgeAction::kInitialize :
+        NativeBridgeAction::kUnload, GetInstructionSetString(kRuntimeISA));
   }
 
   StartDaemonThreads();
 
-  system_class_loader_ = CreateSystemClassLoader();
-
   {
     ScopedObjectAccess soa(self);
     self->GetJniEnv()->locals.AssertEmpty();
@@ -502,16 +506,16 @@ bool Runtime::InitZygote() {
 #endif
 }
 
-void Runtime::DidForkFromZygote(NativeBridgeAction action) {
+void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
   switch (action) {
     case NativeBridgeAction::kUnload:
-      android::UnloadNativeBridge();
+      UnloadNativeBridge();
       break;
 
     case NativeBridgeAction::kInitialize:
-      android::InitializeNativeBridge();
+      InitializeNativeBridge(env, isa);
       break;
   }
 
@@ -878,10 +882,7 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized)
   //   DidForkFromZygote(kInitialize) -> try to initialize any native bridge given.
   //   No-op wrt native bridge.
   native_bridge_library_filename_ = options->native_bridge_library_filename_;
-  android::LoadNativeBridge(native_bridge_library_filename_.c_str(), &native_bridge_art_callbacks_);
-  VLOG(startup) << "Runtime::Setup native bridge library: "
-                << (native_bridge_library_filename_.empty() ?
-                    "(empty)" : native_bridge_library_filename_);
+  LoadNativeBridge(native_bridge_library_filename_);
 
   VLOG(startup) << "Runtime::Init exiting";
   return true;
@@ -1006,14 +1007,18 @@ void Runtime::DumpLockHolders(std::ostream& os) {
   }
 }
 
-void Runtime::SetStatsEnabled(bool new_state, bool suspended) {
+void Runtime::SetStatsEnabled(bool new_state) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::instrument_entrypoints_lock_);
   if (new_state == true) {
     GetStats()->Clear(~0);
     // TODO: wouldn't it make more sense to clear _all_ threads' stats?
-    Thread::Current()->GetStats()->Clear(~0);
-    GetInstrumentation()->InstrumentQuickAllocEntryPoints(suspended);
-  } else {
-    GetInstrumentation()->UninstrumentQuickAllocEntryPoints(suspended);
+    self->GetStats()->Clear(~0);
+    if (stats_enabled_ != new_state) {
+      GetInstrumentation()->InstrumentQuickAllocEntryPointsLocked();
+    }
+  } else if (stats_enabled_ != new_state) {
+    GetInstrumentation()->UninstrumentQuickAllocEntryPointsLocked();
   }
   stats_enabled_ = new_state;
 }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index f9c017b278..35e3a8896c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -32,7 +32,6 @@
 #include "instrumentation.h"
 #include "instruction_set.h"
 #include "jobject_comparator.h"
-#include "nativebridge/native_bridge.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "profiler_options.h"
@@ -390,7 +389,8 @@ class Runtime {
 
   void ResetStats(int kinds);
 
-  void SetStatsEnabled(bool new_state, bool suspended);
+  void SetStatsEnabled(bool new_state) LOCKS_EXCLUDED(Locks::instrument_entrypoints_lock_,
+                                                      Locks::mutator_lock_);
 
   enum class NativeBridgeAction {  // private
     kUnload,
@@ -398,7 +398,7 @@ class Runtime {
   };
   void PreZygoteFork();
   bool InitZygote();
-  void DidForkFromZygote(NativeBridgeAction action);
+  void DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa);
 
   const instrumentation::Instrumentation* GetInstrumentation() const {
     return &instrumentation_;
@@ -648,17 +648,6 @@ class Runtime {
   // the native bridge to load it and then gets the trampoline for the entry to native activity.
   std::string native_bridge_library_filename_;
 
-  // Native bridge library runtime callbacks. They represent the runtime interface to native bridge.
-  //
-  // The interface is expected to expose the following methods:
-  // getMethodShorty(): in the case of native method calling JNI native function CallXXXXMethodY(),
-  //   native bridge calls back to VM for the shorty of the method so that it can prepare based on
-  //   host calling convention.
-  // getNativeMethodCount() and getNativeMethods(): in case of JNI function UnregisterNatives(),
-  //   native bridge can call back to get all native methods of specified class so that all
-  //   corresponding trampolines can be destroyed.
-  android::NativeBridgeRuntimeCallbacks native_bridge_art_callbacks_;
-
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 6698634dcf..170cec68d1 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -23,7 +23,6 @@
 
 #include "base/casts.h"
 #include "base/mutex-inl.h"
-#include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/heap.h"
 #include "jni_env_ext.h"
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ae89c90dc4..07657d1422 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1220,7 +1220,7 @@ void Thread::HandleUncaughtExceptions(ScopedObjectAccess& soa) {
 
   // Call the handler.
   tlsPtr_.jni_env->CallVoidMethod(handler.get(),
-      WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler_uncaughtException,
+      WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException,
       peer.get(), exception.get());
 
   // If the handler threw, clear that exception too.
diff --git a/runtime/thread.h b/runtime/thread.h
index 164eb86f67..6c427b8d50 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -679,7 +679,7 @@ class Thread {
   // Number of references allocated in handle scopes & JNI shadow frames on this thread.
   size_t NumStackReferences() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return NumHandleReferences() + NumJniShadowFrameReferences();
-  };
+  }
 
   // Is the given obj in this thread's stack indirect reference table?
   bool HandleScopeContains(jobject obj) const;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b32e0429b1..027f62d880 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -361,6 +361,10 @@ void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int
   }
 
   Runtime* runtime = Runtime::Current();
+
+  // Enable count of allocs if specified in the flags.
+  bool enable_stats = false;
+
   runtime->GetThreadList()->SuspendAll();
 
   // Create Trace object.
@@ -369,13 +373,8 @@ void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int
     if (the_trace_ != NULL) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
     } else {
+      enable_stats = (flags && kTraceCountAllocs) != 0;
       the_trace_ = new Trace(trace_file.release(), buffer_size, flags, sampling_enabled);
-
-      // Enable count of allocs if specified in the flags.
-      if ((flags && kTraceCountAllocs) != 0) {
-        runtime->SetStatsEnabled(true, true);
-      }
-
       if (sampling_enabled) {
         CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, NULL, &RunSamplingThread,
                                             reinterpret_cast<void*>(interval_us)),
@@ -391,9 +390,15 @@ void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int
   }
 
   runtime->GetThreadList()->ResumeAll();
+
+  // Can't call this when holding the mutator lock.
+  if (enable_stats) {
+    runtime->SetStatsEnabled(true);
+  }
 }
 
 void Trace::Stop() {
+  bool stop_alloc_counting = false;
   Runtime* runtime = Runtime::Current();
   runtime->GetThreadList()->SuspendAll();
   Trace* the_trace = NULL;
@@ -409,6 +414,7 @@ void Trace::Stop() {
     }
   }
   if (the_trace != NULL) {
+    stop_alloc_counting = (the_trace->flags_ & kTraceCountAllocs) != 0;
     the_trace->FinishTracing();
 
     if (the_trace->sampling_enabled_) {
@@ -425,6 +431,11 @@ void Trace::Stop() {
   }
   runtime->GetThreadList()->ResumeAll();
 
+  if (stop_alloc_counting) {
+    // Can be racy since SetStatsEnabled is not guarded by any locks.
+    Runtime::Current()->SetStatsEnabled(false);
+  }
+
   if (sampling_pthread != 0U) {
     CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, NULL), "sampling thread shutdown");
     sampling_pthread_ = 0U;
@@ -489,10 +500,6 @@ void Trace::FinishTracing() {
 
   size_t final_offset = cur_offset_.LoadRelaxed();
 
-  if ((flags_ & kTraceCountAllocs) != 0) {
-    Runtime::Current()->SetStatsEnabled(false, true);
-  }
-
   std::set<mirror::ArtMethod*> visited_methods;
   GetVisitedMethods(final_offset, &visited_methods);
 
@@ -553,7 +560,7 @@ void Trace::DexPcMoved(Thread* thread, mirror::Object* this_object,
                        mirror::ArtMethod* method, uint32_t new_dex_pc) {
   // We're not recorded to listen to this kind of event, so complain.
   LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
-};
+}
 
 void Trace::FieldRead(Thread* /*thread*/, mirror::Object* this_object,
                        mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 432a2fe0e5..a14889c852 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -89,7 +89,7 @@ TEST_F(TransactionTest, Array_length) {
   Handle<mirror::Array> h_obj(
       hs.NewHandle(
           mirror::Array::Alloc<true>(soa.Self(), h_klass.Get(), kArraySize,
-                                     h_klass->GetComponentSize(),
+                                     h_klass->GetComponentSizeShift(),
                                      Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   ASSERT_TRUE(h_obj.Get() != nullptr);
   ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 9157f6c9bf..0496d97ae7 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -315,10 +315,6 @@ std::string PrettyDescriptor(const char* descriptor) {
   return result;
 }
 
-std::string PrettyDescriptor(Primitive::Type type) {
-  return PrettyDescriptor(Primitive::Descriptor(type));
-}
-
 std::string PrettyField(mirror::ArtField* f, bool with_type) {
   if (f == NULL) {
     return "null";
@@ -478,6 +474,35 @@ std::string PrettyClassAndClassLoader(mirror::Class* c) {
   return result;
 }
 
+std::string PrettyJavaAccessFlags(uint32_t access_flags) {
+  std::string result;
+  if ((access_flags & kAccPublic) != 0) {
+    result += "public ";
+  }
+  if ((access_flags & kAccProtected) != 0) {
+    result += "protected ";
+  }
+  if ((access_flags & kAccPrivate) != 0) {
+    result += "private ";
+  }
+  if ((access_flags & kAccFinal) != 0) {
+    result += "final ";
+  }
+  if ((access_flags & kAccStatic) != 0) {
+    result += "static ";
+  }
+  if ((access_flags & kAccTransient) != 0) {
+    result += "transient ";
+  }
+  if ((access_flags & kAccVolatile) != 0) {
+    result += "volatile ";
+  }
+  if ((access_flags & kAccSynchronized) != 0) {
+    result += "synchronized ";
+  }
+  return result;
+}
+
 std::string PrettySize(int64_t byte_count) {
   // The byte thresholds at which we display amounts.  A byte count is displayed
   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
@@ -1421,4 +1446,8 @@ void PushWord(std::vector<uint8_t>* buf, int data) {
   buf->push_back((data >> 24) & 0xff);
 }
 
+std::string PrettyDescriptor(Primitive::Type type) {
+  return PrettyDescriptor(Primitive::Descriptor(type));
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 9ec6db1e52..3f2d829b68 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -311,6 +311,10 @@ std::string PrettyClass(mirror::Class* c)
 std::string PrettyClassAndClassLoader(mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+// Returns a human-readable version of the Java part of the access flags, e.g., "private static "
+// (note the trailing whitespace).
+std::string PrettyJavaAccessFlags(uint32_t access_flags);
+
 // Returns a human-readable size string such as "1MB".
 std::string PrettySize(int64_t size_in_bytes);
 
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index d6c90e1d45..1b2c3eec05 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -44,6 +44,18 @@ TEST_F(UtilsTest, PrettyDescriptor_ScalarReferences) {
   EXPECT_EQ("java.lang.String", PrettyDescriptor("Ljava/lang/String;"));
 }
 
+TEST_F(UtilsTest, PrettyDescriptor_Primitive) {
+  EXPECT_EQ("boolean", PrettyDescriptor(Primitive::kPrimBoolean));
+  EXPECT_EQ("byte", PrettyDescriptor(Primitive::kPrimByte));
+  EXPECT_EQ("char", PrettyDescriptor(Primitive::kPrimChar));
+  EXPECT_EQ("short", PrettyDescriptor(Primitive::kPrimShort));
+  EXPECT_EQ("int", PrettyDescriptor(Primitive::kPrimInt));
+  EXPECT_EQ("float", PrettyDescriptor(Primitive::kPrimFloat));
+  EXPECT_EQ("long", PrettyDescriptor(Primitive::kPrimLong));
+  EXPECT_EQ("double", PrettyDescriptor(Primitive::kPrimDouble));
+  EXPECT_EQ("void", PrettyDescriptor(Primitive::kPrimVoid));
+}
+
 TEST_F(UtilsTest, PrettyDescriptor_PrimitiveArrays) {
   EXPECT_EQ("boolean[]", PrettyDescriptor("[Z"));
   EXPECT_EQ("boolean[][]", PrettyDescriptor("[[Z"));
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index f28d4883b2..9747b4e46c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -379,10 +379,31 @@ void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
   verifier.FindLocksAtDexPc();
 }
 
+static bool HasMonitorEnterInstructions(const DexFile::CodeItem* const code_item) {
+  const Instruction* inst = Instruction::At(code_item->insns_);
+
+  uint32_t insns_size = code_item->insns_size_in_code_units_;
+  for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
+    if (inst->Opcode() == Instruction::MONITOR_ENTER) {
+      return true;
+    }
+
+    dex_pc += inst->SizeInCodeUnits();
+    inst = inst->Next();
+  }
+
+  return false;
+}
+
 void MethodVerifier::FindLocksAtDexPc() {
   CHECK(monitor_enter_dex_pcs_ != nullptr);
   CHECK(code_item_ != nullptr);  // This only makes sense for methods with code.
 
+  // Quick check whether there are any monitor_enter instructions at all.
+  if (!HasMonitorEnterInstructions(code_item_)) {
+    return;
+  }
+
   // Strictly speaking, we ought to be able to get away with doing a subset of the full method
   // verification. In practice, the phase we want relies on data structures set up by all the
   // earlier passes, so we just run the full method verification and bail out early when we've
@@ -650,6 +671,11 @@ bool MethodVerifier::ScanTryCatchBlocks() {
             << "exception handler starts at bad address (" << dex_pc << ")";
         return false;
       }
+      if (!CheckNotMoveResult(code_item_->insns_, dex_pc)) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+            << "exception handler begins with move-result* (" << dex_pc << ")";
+        return false;
+      }
       insn_flags_[dex_pc].SetBranchTarget();
       // Ensure exception types are resolved so that they don't need resolution to be delivered,
       // unresolved exception types will be ignored by exception delivery
@@ -2766,7 +2792,7 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) {
       return false;
     }
     DCHECK_EQ(isConditional, (opcode_flags & Instruction::kContinue) != 0);
-    if (!CheckNotMoveException(code_item_->insns_, work_insn_idx_ + branch_target)) {
+    if (!CheckNotMoveExceptionOrMoveResult(code_item_->insns_, work_insn_idx_ + branch_target)) {
       return false;
     }
     /* update branch target, set "changed" if appropriate */
@@ -2812,7 +2838,7 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) {
          (((int32_t) switch_insns[offset_to_targets + targ * 2 + 1]) << 16);
       abs_offset = work_insn_idx_ + offset;
       DCHECK_LT(abs_offset, code_item_->insns_size_in_code_units_);
-      if (!CheckNotMoveException(code_item_->insns_, abs_offset)) {
+      if (!CheckNotMoveExceptionOrMoveResult(code_item_->insns_, abs_offset)) {
         return false;
       }
       if (!UpdateRegisters(abs_offset, work_line_.get(), false)) {
@@ -3344,7 +3370,8 @@ mirror::ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst
   CHECK(dispatch_class->HasVTable()) << PrettyDescriptor(dispatch_class);
   uint16_t vtable_index = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
   CHECK_LT(static_cast<int32_t>(vtable_index), dispatch_class->GetVTableLength())
-      << PrettyDescriptor(klass);
+      << PrettyDescriptor(klass) << " in method "
+      << PrettyMethod(dex_method_idx_, *dex_file_, true);
   mirror::ArtMethod* res_method = dispatch_class->GetVTableEntry(vtable_index);
   CHECK(!self_->IsExceptionPending());
   return res_method;
@@ -3849,7 +3876,11 @@ mirror::ArtField* MethodVerifier::GetQuickFieldAccess(const Instruction* inst,
          inst->Opcode() == Instruction::IGET_OBJECT_QUICK ||
          inst->Opcode() == Instruction::IPUT_QUICK ||
          inst->Opcode() == Instruction::IPUT_WIDE_QUICK ||
-         inst->Opcode() == Instruction::IPUT_OBJECT_QUICK);
+         inst->Opcode() == Instruction::IPUT_OBJECT_QUICK ||
+         inst->Opcode() == Instruction::IPUT_BOOLEAN_QUICK ||
+         inst->Opcode() == Instruction::IPUT_BYTE_QUICK ||
+         inst->Opcode() == Instruction::IPUT_CHAR_QUICK ||
+         inst->Opcode() == Instruction::IPUT_SHORT_QUICK);
   const RegType& object_type = reg_line->GetRegisterType(this, inst->VRegB_22c());
   if (!object_type.HasClass()) {
     VLOG(verifier) << "Failed to get mirror::Class* from '" << object_type << "'";
@@ -4001,6 +4032,19 @@ bool MethodVerifier::CheckNotMoveException(const uint16_t* insns, int insn_idx)
   return true;
 }
 
+bool MethodVerifier::CheckNotMoveResult(const uint16_t* insns, int insn_idx) {
+  if (((insns[insn_idx] & 0xff) >= Instruction::MOVE_RESULT) &&
+      ((insns[insn_idx] & 0xff) <= Instruction::MOVE_RESULT_OBJECT)) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid use of move-result*";
+    return false;
+  }
+  return true;
+}
+
+bool MethodVerifier::CheckNotMoveExceptionOrMoveResult(const uint16_t* insns, int insn_idx) {
+  return (CheckNotMoveException(insns, insn_idx) && CheckNotMoveResult(insns, insn_idx));
+}
+
 bool MethodVerifier::UpdateRegisters(uint32_t next_insn, RegisterLine* merge_line,
                                      bool update_merge_line) {
   bool changed = true;
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 87acb20ac1..9f5efe851f 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -612,6 +612,21 @@ class MethodVerifier {
   bool CheckNotMoveException(const uint16_t* insns, int insn_idx);
 
   /*
+   * Verify that the target instruction is not "move-result". It is important that we cannot
+   * branch to move-result instructions, but we have to make this a distinct check instead of
+   * adding it to CheckNotMoveException, because it is legal to continue into "move-result"
+   * instructions - as long as the previous instruction was an invoke, which is checked elsewhere.
+   */
+  bool CheckNotMoveResult(const uint16_t* insns, int insn_idx);
+
+  /*
+   * Verify that the target instruction is not "move-result" or "move-exception". This is to
+   * be used when checking branch and switch instructions, but not instructions that can
+   * continue.
+   */
+  bool CheckNotMoveExceptionOrMoveResult(const uint16_t* insns, int insn_idx);
+
+  /*
   * Control can transfer to "next_insn". Merge the registers from merge_line into the table at
   * next_insn, and set the changed flag on the target address if any of the registers were changed.
   * In the case of fall-through, update the merge line on a change as its the working line for the
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index d429dfd411..34d6caa6c7 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -340,6 +340,8 @@ class PrimitiveType : public RegType {
  public:
   PrimitiveType(mirror::Class* klass, const std::string& descriptor,
                 uint16_t cache_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool HasClassVirtual() const OVERRIDE { return true; }
 };
 
 class Cat1Type : public PrimitiveType {
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index bffec4b0e7..7c4094574f 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -96,7 +96,7 @@ const RegType& RegTypeCache::FromDescriptor(mirror::ClassLoader* loader, const c
   } else {
     return Conflict();
   }
-};
+}
 
 const RegType& RegTypeCache::RegTypeFromPrimitiveType(Primitive::Type prim_type) const {
   DCHECK(RegTypeCache::primitive_initialized_);
@@ -288,6 +288,7 @@ const Type* RegTypeCache::CreatePrimitiveTypeInstance(const std::string& descrip
   if (!descriptor.empty()) {
     klass = art::Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
                                                                        descriptor.c_str());
+    DCHECK(klass != nullptr);
   }
   const Type* entry = Type::CreateInstance(klass, descriptor, RegTypeCache::primitive_count_);
   RegTypeCache::primitive_count_++;
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index aad3b5ab99..2fecc8b25f 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -112,6 +112,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(bool_reg_type.IsDoubleTypes());
   EXPECT_TRUE(bool_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(bool_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(bool_reg_type.HasClass());
 
   const RegType& byte_reg_type = cache.Byte();
   EXPECT_FALSE(byte_reg_type.IsUndefined());
@@ -144,6 +145,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(byte_reg_type.IsDoubleTypes());
   EXPECT_TRUE(byte_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(byte_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(byte_reg_type.HasClass());
 
   const RegType& char_reg_type = cache.Char();
   EXPECT_FALSE(char_reg_type.IsUndefined());
@@ -176,6 +178,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(char_reg_type.IsDoubleTypes());
   EXPECT_TRUE(char_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(char_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(char_reg_type.HasClass());
 
   const RegType& short_reg_type = cache.Short();
   EXPECT_FALSE(short_reg_type.IsUndefined());
@@ -208,6 +211,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(short_reg_type.IsDoubleTypes());
   EXPECT_TRUE(short_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(short_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(short_reg_type.HasClass());
 
   const RegType& int_reg_type = cache.Integer();
   EXPECT_FALSE(int_reg_type.IsUndefined());
@@ -240,6 +244,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(int_reg_type.IsDoubleTypes());
   EXPECT_TRUE(int_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(int_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(int_reg_type.HasClass());
 
   const RegType& long_reg_type = cache.LongLo();
   EXPECT_FALSE(long_reg_type.IsUndefined());
@@ -272,6 +277,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(long_reg_type.IsDoubleTypes());
   EXPECT_FALSE(long_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(long_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(long_reg_type.HasClass());
 
   const RegType& float_reg_type = cache.Float();
   EXPECT_FALSE(float_reg_type.IsUndefined());
@@ -304,6 +310,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_FALSE(float_reg_type.IsDoubleTypes());
   EXPECT_FALSE(float_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(float_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(float_reg_type.HasClass());
 
   const RegType& double_reg_type = cache.DoubleLo();
   EXPECT_FALSE(double_reg_type.IsUndefined());
@@ -336,6 +343,7 @@ TEST_F(RegTypeTest, Primitives) {
   EXPECT_TRUE(double_reg_type.IsDoubleTypes());
   EXPECT_FALSE(double_reg_type.IsArrayIndexTypes());
   EXPECT_FALSE(double_reg_type.IsNonZeroReferenceTypes());
+  EXPECT_TRUE(double_reg_type.HasClass());
 }
 
 class RegTypeReferenceTest : public CommonRuntimeTest {};
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index cef604b630..4a3c3ec549 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -29,7 +29,7 @@ namespace art {
 jclass WellKnownClasses::com_android_dex_Dex;
 jclass WellKnownClasses::dalvik_system_DexFile;
 jclass WellKnownClasses::dalvik_system_DexPathList;
-jclass WellKnownClasses::dalvik_system_DexPathList$Element;
+jclass WellKnownClasses::dalvik_system_DexPathList__Element;
 jclass WellKnownClasses::dalvik_system_PathClassLoader;
 jclass WellKnownClasses::java_lang_BootClassLoader;
 jclass WellKnownClasses::java_lang_ClassLoader;
@@ -48,7 +48,7 @@ jclass WellKnownClasses::java_lang_StackOverflowError;
 jclass WellKnownClasses::java_lang_String;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
-jclass WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler;
+jclass WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler;
 jclass WellKnownClasses::java_lang_ThreadGroup;
 jclass WellKnownClasses::java_lang_Throwable;
 jclass WellKnownClasses::java_nio_DirectByteBuffer;
@@ -78,7 +78,7 @@ jmethodID WellKnownClasses::java_lang_Short_valueOf;
 jmethodID WellKnownClasses::java_lang_System_runFinalization = NULL;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
-jmethodID WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler_uncaughtException;
+jmethodID WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
 jmethodID WellKnownClasses::java_lang_ThreadGroup_removeThread;
 jmethodID WellKnownClasses::java_nio_DirectByteBuffer_init;
 jmethodID WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer_broadcast;
@@ -87,7 +87,7 @@ jmethodID WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 jfieldID WellKnownClasses::dalvik_system_DexFile_cookie;
 jfieldID WellKnownClasses::dalvik_system_PathClassLoader_pathList;
 jfieldID WellKnownClasses::dalvik_system_DexPathList_dexElements;
-jfieldID WellKnownClasses::dalvik_system_DexPathList$Element_dexFile;
+jfieldID WellKnownClasses::dalvik_system_DexPathList__Element_dexFile;
 jfieldID WellKnownClasses::java_lang_Thread_daemon;
 jfieldID WellKnownClasses::java_lang_Thread_group;
 jfieldID WellKnownClasses::java_lang_Thread_lock;
@@ -163,7 +163,7 @@ void WellKnownClasses::Init(JNIEnv* env) {
   com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
   dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
   dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
-  dalvik_system_DexPathList$Element = CacheClass(env, "dalvik/system/DexPathList$Element");
+  dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element");
   dalvik_system_PathClassLoader = CacheClass(env, "dalvik/system/PathClassLoader");
   java_lang_BootClassLoader = CacheClass(env, "java/lang/BootClassLoader");
   java_lang_ClassLoader = CacheClass(env, "java/lang/ClassLoader");
@@ -182,7 +182,8 @@ void WellKnownClasses::Init(JNIEnv* env) {
   java_lang_String = CacheClass(env, "java/lang/String");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
-  java_lang_Thread$UncaughtExceptionHandler = CacheClass(env, "java/lang/Thread$UncaughtExceptionHandler");
+  java_lang_Thread__UncaughtExceptionHandler = CacheClass(env,
+      "java/lang/Thread$UncaughtExceptionHandler");
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
   java_lang_Throwable = CacheClass(env, "java/lang/Throwable");
   java_nio_DirectByteBuffer = CacheClass(env, "java/nio/DirectByteBuffer");
@@ -207,7 +208,7 @@ void WellKnownClasses::Init(JNIEnv* env) {
   java_lang_reflect_Proxy_invoke = CacheMethod(env, java_lang_reflect_Proxy, true, "invoke", "(Ljava/lang/reflect/Proxy;Ljava/lang/reflect/ArtMethod;[Ljava/lang/Object;)Ljava/lang/Object;");
   java_lang_Thread_init = CacheMethod(env, java_lang_Thread, false, "<init>", "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
   java_lang_Thread_run = CacheMethod(env, java_lang_Thread, false, "run", "()V");
-  java_lang_Thread$UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread$UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
+  java_lang_Thread__UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread__UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
   java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "removeThread", "(Ljava/lang/Thread;)V");
   java_nio_DirectByteBuffer_init = CacheMethod(env, java_nio_DirectByteBuffer, false, "<init>", "(JI)V");
   org_apache_harmony_dalvik_ddmc_DdmServer_broadcast = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "broadcast", "(I)V");
@@ -216,7 +217,7 @@ void WellKnownClasses::Init(JNIEnv* env) {
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "J");
   dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
-  dalvik_system_DexPathList$Element_dexFile = CacheField(env, dalvik_system_DexPathList$Element, false, "dexFile", "Ldalvik/system/DexFile;");
+  dalvik_system_DexPathList__Element_dexFile = CacheField(env, dalvik_system_DexPathList__Element, false, "dexFile", "Ldalvik/system/DexFile;");
   java_lang_Thread_daemon = CacheField(env, java_lang_Thread, false, "daemon", "Z");
   java_lang_Thread_group = CacheField(env, java_lang_Thread, false, "group", "Ljava/lang/ThreadGroup;");
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 37807333b1..790d7f7ab6 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -42,7 +42,7 @@ struct WellKnownClasses {
   static jclass com_android_dex_Dex;
   static jclass dalvik_system_DexFile;
   static jclass dalvik_system_DexPathList;
-  static jclass dalvik_system_DexPathList$Element;
+  static jclass dalvik_system_DexPathList__Element;
   static jclass dalvik_system_PathClassLoader;
   static jclass java_lang_BootClassLoader;
   static jclass java_lang_ClassLoader;
@@ -62,7 +62,7 @@ struct WellKnownClasses {
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
-  static jclass java_lang_Thread$UncaughtExceptionHandler;
+  static jclass java_lang_Thread__UncaughtExceptionHandler;
   static jclass java_lang_Throwable;
   static jclass java_util_Collections;
   static jclass java_nio_DirectByteBuffer;
@@ -91,7 +91,7 @@ struct WellKnownClasses {
   static jmethodID java_lang_System_runFinalization;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
-  static jmethodID java_lang_Thread$UncaughtExceptionHandler_uncaughtException;
+  static jmethodID java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
   static jmethodID java_lang_ThreadGroup_removeThread;
   static jmethodID java_nio_DirectByteBuffer_init;
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_broadcast;
@@ -99,7 +99,7 @@ struct WellKnownClasses {
 
   static jfieldID dalvik_system_DexFile_cookie;
   static jfieldID dalvik_system_DexPathList_dexElements;
-  static jfieldID dalvik_system_DexPathList$Element_dexFile;
+  static jfieldID dalvik_system_DexPathList__Element_dexFile;
   static jfieldID dalvik_system_PathClassLoader_pathList;
   static jfieldID java_lang_reflect_AbstractMethod_artMethod;
   static jfieldID java_lang_reflect_Field_artField;
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 808d968f67..a5eedc6a27 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -1,4 +1,5 @@
 Native bridge initialized.
+Checking for getEnvValues.
 Ready for native bridge tests.
 Checking for support.
 Getting trampoline for JNI_OnLoad with shorty (null).
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 3acc643d99..442f99c486 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -207,7 +207,8 @@ static NativeBridgeMethod* find_native_bridge_method(const char *name) {
 }
 
 // NativeBridgeCallbacks implementations
-extern "C" bool native_bridge_initialize(const android::NativeBridgeRuntimeCallbacks* art_cbs) {
+extern "C" bool native_bridge_initialize(const android::NativeBridgeRuntimeCallbacks* art_cbs,
+                                         const char* private_dir, const char* isa) {
   if (art_cbs != nullptr) {
     gNativeBridgeArtCallbacks = art_cbs;
     printf("Native bridge initialized.\n");
@@ -263,11 +264,49 @@ extern "C" bool native_bridge_isSupported(const char* libpath) {
   return strcmp(libpath, "libjavacore.so") != 0;
 }
 
+namespace android {
+
+// Environment values required by the apps running with native bridge.
+struct NativeBridgeRuntimeValues {
+  const char* os_arch;
+  const char* cpu_abi;
+  const char* cpu_abi2;
+  const char* *supported_abis;
+  int32_t abi_count;
+};
+
+}  // namespace android
+
+const char* supported_abis[] = {
+    "supported1", "supported2", "supported3"
+};
+
+const struct android::NativeBridgeRuntimeValues nb_env {
+    .os_arch = "os.arch",
+    .cpu_abi = "cpu_abi",
+    .cpu_abi2 = "cpu_abi2",
+    .supported_abis = supported_abis,
+    .abi_count = 3
+};
+
+extern "C" const struct android::NativeBridgeRuntimeValues* native_bridge_getAppEnv(
+    const char* abi) {
+  printf("Checking for getEnvValues.\n");
+
+  if (abi == nullptr) {
+    return nullptr;
+  }
+
+  return &nb_env;
+}
+
 // "NativeBridgeItf" is effectively an API (it is the name of the symbol that will be loaded
 // by the native bridge library).
 android::NativeBridgeCallbacks NativeBridgeItf {
+  .version = 1,
   .initialize = &native_bridge_initialize,
   .loadLibrary = &native_bridge_loadLibrary,
   .getTrampoline = &native_bridge_getTrampoline,
-  .isSupported = &native_bridge_isSupported
+  .isSupported = &native_bridge_isSupported,
+  .getAppEnv = &native_bridge_getAppEnv
 };
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 773a9501f2..302db38883 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -157,6 +157,15 @@ endif
 
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS :=
 
+TEST_ART_BROKEN_RUN_TESTS := \
+  004-ThreadStress
+
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES), $(TEST_ART_BROKEN_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+
+TEST_ART_BROKEN_RUN_TESTS :=
+
 # Note 116-nodex2oat is not broken per-se it just doesn't (and isn't meant to) work with --prebuild.
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS := \
   116-nodex2oat
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index d2b3fb1869..c020478d72 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 #
 # Run the code in test.jar using the host-mode virtual machine. The jar should
 # contain a top-level class named Main to run.
@@ -234,4 +234,4 @@ else
   if [ ${PIPESTATUS[0]} = 124 ] && [ "$TIME_OUT" = "y" ]; then
     echo -e "\e[91mTEST TIMED OUT!\e[0m" >&2
   fi
-fi
-\ No newline at end of file
+fi