Merge "Fix ExtractToMemMap to show original zip file name in ashmem"
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index fb6c625..5050d4e 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -377,7 +377,7 @@
     timings.EndSplit();
   }
 
-  void CompileDirectMethod(Handle<mirror::ClassLoader>& class_loader, const char* class_name,
+  void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
                            const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
@@ -390,7 +390,7 @@
     CompileMethod(method);
   }
 
-  void CompileVirtualMethod(Handle<mirror::ClassLoader>& class_loader, const char* class_name,
+  void CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
                             const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 1300071..24fea71 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -150,7 +150,7 @@
   if (insn == NULL) {
     LOG(FATAL) << "Break split failed";
   }
-  BasicBlock *bottom_block = NewMemBB(kDalvikByteCode, num_blocks_++);
+  BasicBlock* bottom_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(bottom_block);
 
   bottom_block->start_offset = code_offset;
@@ -188,9 +188,9 @@
     orig_block->successor_blocks = NULL;
     GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks);
     while (true) {
-      SuccessorBlockInfo *successor_block_info = iterator.Next();
+      SuccessorBlockInfo* successor_block_info = iterator.Next();
       if (successor_block_info == NULL) break;
-      BasicBlock *bb = GetBasicBlock(successor_block_info->block);
+      BasicBlock* bb = GetBasicBlock(successor_block_info->block);
       bb->predecessors->Delete(orig_block->id);
       bb->predecessors->Insert(bottom_block->id);
     }
@@ -298,7 +298,7 @@
     }
   }
 
-  // Iterate over each of the handlers to enqueue the empty Catch blocks
+  // Iterate over each of the handlers to enqueue the empty Catch blocks.
   const byte* handlers_ptr = DexFile::GetCatchHandlerData(*current_code_item_, 0);
   uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
   for (uint32_t idx = 0; idx < handlers_size; idx++) {
@@ -345,7 +345,7 @@
       LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
   }
   CountBranch(target);
-  BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true,
+  BasicBlock* taken_block = FindBlock(target, /* split */ true, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block);
   cur_block->taken = taken_block->id;
   taken_block->predecessors->Insert(cur_block->id);
@@ -405,7 +405,7 @@
     size = switch_data[1];
     first_key = switch_data[2] | (switch_data[3] << 16);
     target_table = reinterpret_cast<const int*>(&switch_data[4]);
-    keyTable = NULL;        // Make the compiler happy
+    keyTable = NULL;        // Make the compiler happy.
   /*
    * Sparse switch data format:
    *  ushort ident = 0x0200   magic value
@@ -421,7 +421,7 @@
     size = switch_data[1];
     keyTable = reinterpret_cast<const int*>(&switch_data[2]);
     target_table = reinterpret_cast<const int*>(&switch_data[2 + size*2]);
-    first_key = 0;   // To make the compiler happy
+    first_key = 0;   // To make the compiler happy.
   }
 
   if (cur_block->successor_block_list_type != kNotUsed) {
@@ -434,9 +434,9 @@
       new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, size, kGrowableArraySuccessorBlocks);
 
   for (i = 0; i < size; i++) {
-    BasicBlock *case_block = FindBlock(cur_offset + target_table[i], /* split */ true,
+    BasicBlock* case_block = FindBlock(cur_offset + target_table[i], /* split */ true,
                                       /* create */ true, /* immed_pred_block_p */ &cur_block);
-    SuccessorBlockInfo *successor_block_info =
+    SuccessorBlockInfo* successor_block_info =
         static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
                                                        kArenaAllocSuccessor));
     successor_block_info->block = case_block->id;
@@ -479,13 +479,13 @@
         new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, 2, kGrowableArraySuccessorBlocks);
 
     for (; iterator.HasNext(); iterator.Next()) {
-      BasicBlock *catch_block = FindBlock(iterator.GetHandlerAddress(), false /* split*/,
+      BasicBlock* catch_block = FindBlock(iterator.GetHandlerAddress(), false /* split*/,
                                          false /* creat */, NULL  /* immed_pred_block_p */);
       catch_block->catch_entry = true;
       if (kIsDebugBuild) {
         catches_.insert(catch_block->start_offset);
       }
-      SuccessorBlockInfo *successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
+      SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
           (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
       successor_block_info->block = catch_block->id;
       successor_block_info->key = iterator.GetHandlerTypeIndex();
@@ -493,7 +493,7 @@
       catch_block->predecessors->Insert(cur_block->id);
     }
   } else if (build_all_edges) {
-    BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
+    BasicBlock* eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
     cur_block->taken = eh_block->id;
     block_list_.Insert(eh_block);
     eh_block->start_offset = cur_offset;
@@ -503,7 +503,7 @@
   if (is_throw) {
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
-      // Force creation of new block following THROW via side-effect
+      // Force creation of new block following THROW via side-effect.
       FindBlock(cur_offset + width, /* split */ false, /* create */ true,
                 /* immed_pred_block_p */ NULL);
     }
@@ -549,7 +549,7 @@
   *new_insn = *insn;
   insn->dalvikInsn.opcode =
       static_cast<Instruction::Code>(kMirOpCheck);
-  // Associate the two halves
+  // Associate the two halves.
   insn->meta.throw_insn = new_insn;
   new_block->AppendMIR(new_insn);
   return new_block;
@@ -616,7 +616,7 @@
   }
 
   /* Current block to record parsed instructions */
-  BasicBlock *cur_block = NewMemBB(kDalvikByteCode, num_blocks_++);
+  BasicBlock* cur_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   DCHECK_EQ(current_offset_, 0U);
   cur_block->start_offset = current_offset_;
   block_list_.Insert(cur_block);
@@ -654,7 +654,7 @@
       cur_block->use_lvn = true;  // Run local value numbering on this basic block.
     }
 
-    // Check for inline data block signatures
+    // Check for inline data block signatures.
     if (opcode == Instruction::NOP) {
       // A simple NOP will have a width of 1 at this point, embedded data NOP > 1.
       if ((width == 1) && ((current_offset_ & 0x1) == 0x1) && ((code_end - code_ptr) > 1)) {
@@ -798,7 +798,7 @@
 
   for (idx = 0; idx < num_blocks; idx++) {
     int block_idx = all_blocks ? idx : dfs_order_->Get(idx);
-    BasicBlock *bb = GetBasicBlock(block_idx);
+    BasicBlock* bb = GetBasicBlock(block_idx);
     if (bb == NULL) continue;
     if (bb->block_type == kDead) continue;
     if (bb->block_type == kEntryBlock) {
@@ -808,7 +808,7 @@
     } else if (bb->block_type == kDalvikByteCode) {
       fprintf(file, "  block%04x_%d [shape=record,label = \"{ \\\n",
               bb->start_offset, bb->id);
-      const MIR *mir;
+      const MIR* mir;
         fprintf(file, "    {block id %d\\l}%s\\\n", bb->id,
                 bb->first_mir_insn ? " | " : " ");
         for (mir = bb->first_mir_insn; mir; mir = mir->next) {
@@ -870,13 +870,13 @@
               bb->start_offset, bb->id,
               (bb->successor_block_list_type == kCatch) ?  "Mrecord" : "record");
       GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks);
-      SuccessorBlockInfo *successor_block_info = iterator.Next();
+      SuccessorBlockInfo* successor_block_info = iterator.Next();
 
       int succ_id = 0;
       while (true) {
         if (successor_block_info == NULL) break;
 
-        BasicBlock *dest_block = GetBasicBlock(successor_block_info->block);
+        BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
         SuccessorBlockInfo *next_successor_block_info = iterator.Next();
 
         fprintf(file, "    {<f%d> %04x: %04x\\l}%s\\\n",
@@ -898,7 +898,7 @@
 
       succ_id = 0;
       while (true) {
-        SuccessorBlockInfo *successor_block_info = iter.Next();
+        SuccessorBlockInfo* successor_block_info = iter.Next();
         if (successor_block_info == NULL) break;
 
         BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
@@ -927,44 +927,78 @@
 
 /* Insert an MIR instruction to the end of a basic block. */
 void BasicBlock::AppendMIR(MIR* mir) {
-  if (first_mir_insn == nullptr) {
-    DCHECK(last_mir_insn == nullptr);
-    last_mir_insn = first_mir_insn = mir;
-    mir->next = nullptr;
-  } else {
-    last_mir_insn->next = mir;
-    mir->next = nullptr;
-    last_mir_insn = mir;
-  }
-
-  mir->bb = id;
+  // Insert it after the last MIR.
+  InsertMIRListAfter(last_mir_insn, mir, mir);
 }
 
-/* Insert an MIR instruction to the head of a basic block. */
-void BasicBlock::PrependMIR(MIR* mir) {
-  if (first_mir_insn == nullptr) {
-    DCHECK(last_mir_insn == nullptr);
-    last_mir_insn = first_mir_insn = mir;
-    mir->next = nullptr;
-  } else {
-    mir->next = first_mir_insn;
-    first_mir_insn = mir;
-  }
+void BasicBlock::AppendMIRList(MIR* first_list_mir, MIR* last_list_mir) {
+  // Insert it after the last MIR.
+  InsertMIRListAfter(last_mir_insn, first_list_mir, last_list_mir);
+}
 
-  mir->bb = id;
+void BasicBlock::AppendMIRList(const std::vector<MIR*>& insns) {
+  for (std::vector<MIR*>::const_iterator it = insns.begin(); it != insns.end(); it++) {
+    MIR* new_mir = *it;
+
+    // Add a copy of each MIR.
+    InsertMIRListAfter(last_mir_insn, new_mir, new_mir);
+  }
 }
 
 /* Insert a MIR instruction after the specified MIR. */
 void BasicBlock::InsertMIRAfter(MIR* current_mir, MIR* new_mir) {
-  new_mir->next = current_mir->next;
-  current_mir->next = new_mir;
+  InsertMIRListAfter(current_mir, new_mir, new_mir);
+}
 
-  if (last_mir_insn == current_mir) {
-    /* Is the last MIR in the block? */
-    last_mir_insn = new_mir;
+void BasicBlock::InsertMIRListAfter(MIR* insert_after, MIR* first_list_mir, MIR* last_list_mir) {
+  // If no MIR, we are done.
+  if (first_list_mir == nullptr || last_list_mir == nullptr) {
+    return;
   }
 
-  new_mir->bb = id;
+  // If insert_after is null, assume BB is empty.
+  if (insert_after == nullptr) {
+    first_mir_insn = first_list_mir;
+    last_mir_insn = last_list_mir;
+    last_list_mir->next = nullptr;
+  } else {
+    MIR* after_list = insert_after->next;
+    insert_after->next = first_list_mir;
+    last_list_mir->next = after_list;
+    if (after_list == nullptr) {
+      last_mir_insn = last_list_mir;
+    }
+  }
+
+  // Set this BB to be the basic block of the MIRs.
+  MIR* last = last_list_mir->next;
+  for (MIR* mir = first_list_mir; mir != last; mir = mir->next) {
+    mir->bb = id;
+  }
+}
+
+/* Insert an MIR instruction to the head of a basic block. */
+void BasicBlock::PrependMIR(MIR* mir) {
+  InsertMIRListBefore(first_mir_insn, mir, mir);
+}
+
+void BasicBlock::PrependMIRList(MIR* first_list_mir, MIR* last_list_mir) {
+  // Insert it before the first MIR.
+  InsertMIRListBefore(first_mir_insn, first_list_mir, last_list_mir);
+}
+
+void BasicBlock::PrependMIRList(const std::vector<MIR*>& to_add) {
+  for (std::vector<MIR*>::const_iterator it = to_add.begin(); it != to_add.end(); it++) {
+    MIR* mir = *it;
+
+    InsertMIRListBefore(first_mir_insn, mir, mir);
+  }
+}
+
+/* Insert a MIR instruction before the specified MIR. */
+void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) {
+  // Insert as a single element list.
+  return InsertMIRListBefore(current_mir, new_mir, new_mir);
 }
 
 MIR* BasicBlock::FindPreviousMIR(MIR* mir) {
@@ -983,20 +1017,79 @@
   return nullptr;
 }
 
-void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) {
-  if (first_mir_insn == current_mir) {
-    /* Is the first MIR in the block? */
-    first_mir_insn = new_mir;
-    new_mir->bb = id;
+void BasicBlock::InsertMIRListBefore(MIR* insert_before, MIR* first_list_mir, MIR* last_list_mir) {
+  // If no MIR, we are done.
+  if (first_list_mir == nullptr || last_list_mir == nullptr) {
+    return;
   }
 
-  MIR* prev = FindPreviousMIR(current_mir);
-
-  if (prev != nullptr) {
-    prev->next = new_mir;
-    new_mir->next = current_mir;
-    new_mir->bb = id;
+  // If insert_before is null, assume BB is empty.
+  if (insert_before == nullptr) {
+    first_mir_insn = first_list_mir;
+    last_mir_insn = last_list_mir;
+    last_list_mir->next = nullptr;
+  } else {
+    if (first_mir_insn == insert_before) {
+      last_list_mir->next = first_mir_insn;
+      first_mir_insn = first_list_mir;
+    } else {
+      // Find the preceding MIR.
+      MIR* before_list = FindPreviousMIR(insert_before);
+      DCHECK(before_list != nullptr);
+      before_list->next = first_list_mir;
+      last_list_mir->next = insert_before;
+    }
   }
+
+  // Set this BB to be the basic block of the MIRs.
+  for (MIR* mir = first_list_mir; mir != last_list_mir->next; mir = mir->next) {
+    mir->bb = id;
+  }
+}
+
+bool BasicBlock::RemoveMIR(MIR* mir) {
+  // Remove as a single element list.
+  return RemoveMIRList(mir, mir);
+}
+
+bool BasicBlock::RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir) {
+  if (first_list_mir == nullptr) {
+    return false;
+  }
+
+  // Try to find the MIR.
+  MIR* before_list = nullptr;
+  MIR* after_list = nullptr;
+
+  // If we are removing from the beginning of the MIR list.
+  if (first_mir_insn == first_list_mir) {
+    before_list = nullptr;
+  } else {
+    before_list = FindPreviousMIR(first_list_mir);
+    if (before_list == nullptr) {
+      // We did not find the mir.
+      return false;
+    }
+  }
+
+  // Remove the BB information and also find the after_list
+  for (MIR* mir = first_list_mir; mir != last_list_mir; mir = mir->next) {
+    mir->bb = NullBasicBlockId;
+  }
+
+  after_list = last_list_mir->next;
+
+  // If there is nothing before the list, after_list is the first_mir
+  if (before_list == nullptr) {
+    first_mir_insn = after_list;
+  }
+
+  // If there is nothing after the list, before_list is last_mir
+  if (after_list == nullptr) {
+    last_mir_insn = before_list;
+  }
+
+  return true;
 }
 
 MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) {
@@ -1024,7 +1117,7 @@
   char* ret;
   bool nop = false;
   SSARepresentation* ssa_rep = mir->ssa_rep;
-  Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format
+  Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format.
   int defs = (ssa_rep != NULL) ? ssa_rep->num_defs : 0;
   int uses = (ssa_rep != NULL) ? ssa_rep->num_uses : 0;
 
@@ -1032,7 +1125,7 @@
   if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
     str.append(": ");
-    // Recover the original Dex instruction
+    // Recover the original Dex instruction.
     insn = mir->meta.throw_insn->dalvikInsn;
     ssa_rep = mir->meta.throw_insn->ssa_rep;
     defs = ssa_rep->num_defs;
@@ -1091,7 +1184,7 @@
     str.append(StringPrintf(" 0x%x (%c%x)", mir->offset + offset,
                             offset > 0 ? '+' : '-', offset > 0 ? offset : -offset));
   } else {
-    // For invokes-style formats, treat wide regs as a pair of singles
+    // For invokes-style formats, treat wide regs as a pair of singles.
     bool show_singles = ((dalvik_format == Instruction::k35c) ||
                          (dalvik_format == Instruction::k3rc));
     if (defs != 0) {
@@ -1112,28 +1205,28 @@
       }
     }
     switch (dalvik_format) {
-      case Instruction::k11n:  // Add one immediate from vB
+      case Instruction::k11n:  // Add one immediate from vB.
       case Instruction::k21s:
       case Instruction::k31i:
       case Instruction::k21h:
         str.append(StringPrintf(", #%d", insn.vB));
         break;
-      case Instruction::k51l:  // Add one wide immediate
+      case Instruction::k51l:  // Add one wide immediate.
         str.append(StringPrintf(", #%" PRId64, insn.vB_wide));
         break;
-      case Instruction::k21c:  // One register, one string/type/method index
+      case Instruction::k21c:  // One register, one string/type/method index.
       case Instruction::k31c:
         str.append(StringPrintf(", index #%d", insn.vB));
         break;
-      case Instruction::k22c:  // Two registers, one string/type/method index
+      case Instruction::k22c:  // Two registers, one string/type/method index.
         str.append(StringPrintf(", index #%d", insn.vC));
         break;
-      case Instruction::k22s:  // Add one immediate from vC
+      case Instruction::k22s:  // Add one immediate from vC.
       case Instruction::k22b:
         str.append(StringPrintf(", #%d", insn.vC));
         break;
       default: {
-        // Nothing left to print
+        // Nothing left to print.
       }
     }
   }
@@ -1167,7 +1260,7 @@
 // Similar to GetSSAName, but if ssa name represents an immediate show that as well.
 std::string MIRGraph::GetSSANameWithConst(int ssa_reg, bool singles_only) {
   if (reg_location_ == NULL) {
-    // Pre-SSA - just use the standard name
+    // Pre-SSA - just use the standard name.
     return GetSSAName(ssa_reg);
   }
   if (IsConst(reg_location_[ssa_reg])) {
@@ -1287,8 +1380,8 @@
 
 // Allocate a new basic block.
 BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
-  BasicBlock* bb = static_cast<BasicBlock*>(arena_->Alloc(sizeof(BasicBlock),
-                                                          kArenaAllocBB));
+  BasicBlock* bb = new (arena_) BasicBlock();
+
   bb->block_type = block_type;
   bb->id = block_id;
   // TUNING: better estimate of the exit block predecessors?
@@ -1306,11 +1399,11 @@
 }
 
 void MIRGraph::InitializeMethodUses() {
-  // The gate starts by initializing the use counts
+  // The gate starts by initializing the use counts.
   int num_ssa_regs = GetNumSSARegs();
   use_counts_.Resize(num_ssa_regs + 32);
   raw_use_counts_.Resize(num_ssa_regs + 32);
-  // Initialize list
+  // Initialize list.
   for (int i = 0; i < num_ssa_regs; i++) {
     use_counts_.Insert(0);
     raw_use_counts_.Insert(0);
@@ -1505,46 +1598,52 @@
   return nullptr;
 }
 
-bool BasicBlock::RemoveMIR(MIR* mir) {
-  if (mir == nullptr) {
-    return false;
-  }
+BasicBlock* BasicBlock::Copy(CompilationUnit* c_unit) {
+  MIRGraph* mir_graph = c_unit->mir_graph.get();
+  return Copy(mir_graph);
+}
 
-  // Find the MIR, and the one before it if they exist.
-  MIR* current = nullptr;
-  MIR* prev = nullptr;
+BasicBlock* BasicBlock::Copy(MIRGraph* mir_graph) {
+  BasicBlock* result_bb = mir_graph->CreateNewBB(block_type);
 
-  // Find the mir we are looking for.
-  for (current = first_mir_insn; current != nullptr; prev = current, current = current->next) {
-    if (current == mir) {
-      break;
+  // We don't do a memcpy style copy here because it would lead to a lot of things
+  // to clean up. Let us do it by hand instead.
+  // Copy in taken and fallthrough.
+  result_bb->fall_through = fall_through;
+  result_bb->taken = taken;
+
+  // Copy successor links if needed.
+  ArenaAllocator* arena = mir_graph->GetArena();
+
+  result_bb->successor_block_list_type = successor_block_list_type;
+  if (result_bb->successor_block_list_type != kNotUsed) {
+    size_t size = successor_blocks->Size();
+    result_bb->successor_blocks = new (arena) GrowableArray<SuccessorBlockInfo*>(arena, size, kGrowableArraySuccessorBlocks);
+    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(successor_blocks);
+    while (true) {
+      SuccessorBlockInfo* sbi_old = iterator.Next();
+      if (sbi_old == nullptr) {
+        break;
+      }
+      SuccessorBlockInfo* sbi_new = static_cast<SuccessorBlockInfo*>(arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+      memcpy(sbi_new, sbi_old, sizeof(SuccessorBlockInfo));
+      result_bb->successor_blocks->Insert(sbi_new);
     }
   }
 
-  // Did we find it?
-  if (current != nullptr) {
-    MIR* next = current->next;
+  // Copy offset, method.
+  result_bb->start_offset = start_offset;
 
-    // Just update the links of prev and next and current is almost gone.
-    if (prev != nullptr) {
-      prev->next = next;
-    }
+  // Now copy instructions.
+  for (MIR* mir = first_mir_insn; mir != 0; mir = mir->next) {
+    // Get a copy first.
+    MIR* copy = mir->Copy(mir_graph);
 
-    // Exceptions are if first or last mirs are invoke.
-    if (first_mir_insn == current) {
-      first_mir_insn = next;
-    }
-
-    if (last_mir_insn == current) {
-      last_mir_insn = prev;
-    }
-
-    // Found it and removed it.
-    return true;
+    // Append it.
+    result_bb->AppendMIR(copy);
   }
 
-  // We did not find it.
-  return false;
+  return result_bb;
 }
 
 MIR* MIR::Copy(MIRGraph* mir_graph) {
@@ -1660,4 +1759,155 @@
 
   return sets_const;
 }
+
+void BasicBlock::ResetOptimizationFlags(uint16_t reset_flags) {
+  // Reset flags for all MIRs in bb.
+  for (MIR* mir = first_mir_insn; mir != NULL; mir = mir->next) {
+    mir->optimization_flags &= (~reset_flags);
+  }
+}
+
+void BasicBlock::Hide(CompilationUnit* c_unit) {
+  // First lets make it a dalvik bytecode block so it doesn't have any special meaning.
+  block_type = kDalvikByteCode;
+
+  // Mark it as hidden.
+  hidden = true;
+
+  // Detach it from its MIRs so we don't generate code for them. Also detached MIRs
+  // are updated to know that they no longer have a parent.
+  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
+    mir->bb = NullBasicBlockId;
+  }
+  first_mir_insn = nullptr;
+  last_mir_insn = nullptr;
+
+  GrowableArray<BasicBlockId>::Iterator iterator(predecessors);
+
+  MIRGraph* mir_graph = c_unit->mir_graph.get();
+  while (true) {
+    BasicBlock* pred_bb = mir_graph->GetBasicBlock(iterator.Next());
+    if (pred_bb == nullptr) {
+      break;
+    }
+
+    // Sadly we have to go through the children by hand here.
+    pred_bb->ReplaceChild(id, NullBasicBlockId);
+  }
+
+  // Iterate through children of bb we are hiding.
+  ChildBlockIterator successorChildIter(this, mir_graph);
+
+  for (BasicBlock* childPtr = successorChildIter.Next(); childPtr != 0; childPtr = successorChildIter.Next()) {
+    // Replace child with null child.
+    childPtr->predecessors->Delete(id);
+  }
+}
+
+bool BasicBlock::IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg) {
+  // In order to determine if the ssa reg is live out, we scan all the MIRs. We remember
+  // the last SSA number of the same dalvik register. At the end, if it is different than ssa_reg,
+  // then it is not live out of this BB.
+  int dalvik_reg = c_unit->mir_graph->SRegToVReg(ssa_reg);
+
+  int last_ssa_reg = -1;
+
+  // Walk through the MIRs backwards.
+  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
+    // Get ssa rep.
+    SSARepresentation *ssa_rep = mir->ssa_rep;
+
+    // Go through the defines for this MIR.
+    for (int i = 0; i < ssa_rep->num_defs; i++) {
+      DCHECK(ssa_rep->defs != nullptr);
+
+      // Get the ssa reg.
+      int def_ssa_reg = ssa_rep->defs[i];
+
+      // Get dalvik reg.
+      int def_dalvik_reg = c_unit->mir_graph->SRegToVReg(def_ssa_reg);
+
+      // Compare dalvik regs.
+      if (dalvik_reg == def_dalvik_reg) {
+        // We found a def of the register that we are being asked about.
+        // Remember it.
+        last_ssa_reg = def_ssa_reg;
+      }
+    }
+  }
+
+  if (last_ssa_reg == -1) {
+    // If we get to this point we couldn't find a define of register user asked about.
+    // Let's assume the user knows what he's doing so we can be safe and say that if we
+    // couldn't find a def, it is live out.
+    return true;
+  }
+
+  // If it is not -1, we found a match, is it ssa_reg?
+  return (ssa_reg == last_ssa_reg);
+}
+
+bool BasicBlock::ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb) {
+  // We need to check taken, fall_through, and successor_blocks to replace.
+  bool found = false;
+  if (taken == old_bb) {
+    taken = new_bb;
+    found = true;
+  }
+
+  if (fall_through == old_bb) {
+    fall_through = new_bb;
+    found = true;
+  }
+
+  if (successor_block_list_type != kNotUsed) {
+    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(successor_blocks);
+    while (true) {
+      SuccessorBlockInfo* successor_block_info = iterator.Next();
+      if (successor_block_info == nullptr) {
+        break;
+      }
+      if (successor_block_info->block == old_bb) {
+        successor_block_info->block = new_bb;
+        found = true;
+      }
+    }
+  }
+
+  return found;
+}
+
+void BasicBlock::UpdatePredecessor(BasicBlockId old_parent, BasicBlockId new_parent) {
+  GrowableArray<BasicBlockId>::Iterator iterator(predecessors);
+  bool found = false;
+
+  while (true) {
+    BasicBlockId pred_bb_id = iterator.Next();
+
+    if (pred_bb_id == NullBasicBlockId) {
+      break;
+    }
+
+    if (pred_bb_id == old_parent) {
+      size_t idx = iterator.GetIndex() - 1;
+      predecessors->Put(idx, new_parent);
+      found = true;
+      break;
+    }
+  }
+
+  // If not found, add it.
+  if (found == false) {
+    predecessors->Insert(new_parent);
+  }
+}
+
+// Create a new basic block with block_id as num_blocks_ that is
+// post-incremented.
+BasicBlock* MIRGraph::CreateNewBB(BBType block_type) {
+  BasicBlock* res = NewMemBB(block_type, num_blocks_++);
+  block_list_.Insert(res);
+  return res;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index a849bc1..53a997e 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -389,10 +389,49 @@
   GrowableArray<SuccessorBlockInfo*>* successor_blocks;
 
   void AppendMIR(MIR* mir);
+  void AppendMIRList(MIR* first_list_mir, MIR* last_list_mir);
+  void AppendMIRList(const std::vector<MIR*>& insns);
   void PrependMIR(MIR* mir);
+  void PrependMIRList(MIR* first_list_mir, MIR* last_list_mir);
+  void PrependMIRList(const std::vector<MIR*>& to_add);
   void InsertMIRAfter(MIR* current_mir, MIR* new_mir);
-  void InsertMIRBefore(MIR* current_mir, MIR* new_mir);
+  void InsertMIRListAfter(MIR* insert_after, MIR* first_list_mir, MIR* last_list_mir);
   MIR* FindPreviousMIR(MIR* mir);
+  void InsertMIRBefore(MIR* insert_before, MIR* list);
+  void InsertMIRListBefore(MIR* insert_before, MIR* first_list_mir, MIR* last_list_mir);
+  bool RemoveMIR(MIR* mir);
+  bool RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir);
+
+  BasicBlock* Copy(CompilationUnit* c_unit);
+  BasicBlock* Copy(MIRGraph* mir_graph);
+
+  /**
+   * @brief Reset the optimization_flags field of each MIR.
+   */
+  void ResetOptimizationFlags(uint16_t reset_flags);
+
+  /**
+   * @brief Hide the BasicBlock.
+   * @details Set it to kDalvikByteCode, set hidden to true, remove all MIRs,
+   *          remove itself from any predecessor edges, remove itself from any
+   *          child's predecessor growable array.
+   */
+  void Hide(CompilationUnit* c_unit);
+
+  /**
+   * @brief Is ssa_reg the last SSA definition of that VR in the block?
+   */
+  bool IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg);
+
+  /**
+   * @brief Replace the edge going to old_bb to now go towards new_bb.
+   */
+  bool ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb);
+
+  /**
+   * @brief Update the predecessor growable array from old_pred to new_pred.
+   */
+  void UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred);
 
   /**
    * @brief Used to obtain the next MIR that follows unconditionally.
@@ -403,8 +442,12 @@
    * @return Returns the following MIR if one can be found.
    */
   MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current);
-  bool RemoveMIR(MIR* mir);
   bool IsExceptionBlock() const;
+
+  static void* operator new(size_t size, ArenaAllocator* arena) {
+    return arena->Alloc(sizeof(BasicBlock), kArenaAllocBB);
+  }
+  static void operator delete(void* p) {}  // Nop.
 };
 
 /*
@@ -921,6 +964,7 @@
   BasicBlock* NextDominatedBlock(BasicBlock* bb);
   bool LayoutBlocks(BasicBlock* bb);
   void ComputeTopologicalSortOrder();
+  BasicBlock* CreateNewBB(BBType block_type);
 
   bool InlineCallsGate();
   void InlineCallsStart();
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4cebb7c..f58f078 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -670,7 +670,7 @@
     /* Mark a temp register as dead.  Does not affect allocation state. */
     void Clobber(RegStorage reg);
     void ClobberSReg(int s_reg);
-    void ClobberAliases(RegisterInfo* info);
+    void ClobberAliases(RegisterInfo* info, uint32_t clobber_mask);
     int SRegToPMap(int s_reg);
     void RecordCorePromotion(RegStorage reg, int s_reg);
     RegStorage AllocPreservedCoreReg(int s_reg);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 8c0f2bb..e5ca460 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -173,22 +173,26 @@
       }
       ClobberBody(info);
       if (info->IsAliased()) {
-        ClobberAliases(info);
+        ClobberAliases(info, info->StorageMask());
       } else {
         RegisterInfo* master = info->Master();
         if (info != master) {
           ClobberBody(info->Master());
+          ClobberAliases(info->Master(), info->StorageMask());
         }
       }
     }
   }
 }
 
-void Mir2Lir::ClobberAliases(RegisterInfo* info) {
+void Mir2Lir::ClobberAliases(RegisterInfo* info, uint32_t clobber_mask) {
   for (RegisterInfo* alias = info->GetAliasChain(); alias != nullptr;
        alias = alias->GetAliasChain()) {
     DCHECK(!alias->IsAliased());  // Only the master should be marked as alised.
-    ClobberBody(alias);
+    // Only clobber if we have overlap.
+    if ((alias->StorageMask() & clobber_mask) != 0) {
+      ClobberBody(alias);
+    }
   }
 }
 
@@ -218,7 +222,7 @@
         }
         ClobberBody(info);
         if (info->IsAliased()) {
-          ClobberAliases(info);
+          ClobberAliases(info, info->StorageMask());
         }
       }
     }
@@ -953,11 +957,8 @@
         // If I'm live, master should not be live, but should show liveness in alias set.
         DCHECK_EQ(info->Master()->SReg(), INVALID_SREG);
         DCHECK(!info->Master()->IsDead());
-      } else if (!info->IsDead()) {
-        // If I'm not live, but there is liveness in the set master must be live.
-        DCHECK_EQ(info->SReg(), INVALID_SREG);
-        DCHECK(info->Master()->IsLive());
       }
+// TODO: Add checks in !info->IsDead() case to ensure every live bit is owned by exactly 1 reg.
     }
     if (info->IsAliased()) {
       // Has child aliases.
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 9200106..91a66d3 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -279,6 +279,11 @@
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
+#define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
   EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0),
   { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" },
   { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
@@ -310,10 +315,42 @@
   EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Sqrtsd,    0xF2, 0x51, REG_DEF0_USE0),
+  EXT_0F_ENCODING2_MAP(Pmulld,   0x66, 0x38, 0x40, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Pmullw,    0x66, 0xD5, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Mulps,     0x00, 0x59, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Mulpd,     0x66, 0x59, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Paddb,     0x66, 0xFC, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Paddw,     0x66, 0xFD, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Paddd,     0x66, 0xFE, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addps,     0x00, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addpd,     0xF2, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Psubb,     0x66, 0xF8, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Psubw,     0x66, 0xF9, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Psubd,     0x66, 0xFA, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Subps,     0x00, 0x5C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Subpd,     0x66, 0x5C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Pand,      0x66, 0xDB, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Por,       0x66, 0xEB, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Pxor,      0x66, 0xEF, REG_DEF0_USE0),
+  EXT_0F_ENCODING2_MAP(Phaddw,   0x66, 0x38, 0x01, REG_DEF0_USE0),
+  EXT_0F_ENCODING2_MAP(Phaddd,   0x66, 0x38, 0x02, REG_DEF0_USE0),
 
+  { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1 }, "PextbRRI", "!0r,!1r,!2d" },
+  { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1 }, "PextwRRI", "!0r,!1r,!2d" },
+  { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1 }, "PextdRRI", "!0r,!1r,!2d" },
+
+  { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuflwRRI", "!0r,!1r,!2d" },
+  { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuffRRI", "!0r,!1r,!2d" },
+
+  { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1 }, "PsrawRI", "!0r,!1d" },
+  { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1 }, "PsradRI", "!0r,!1d" },
+  { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1 }, "PsrlwRI", "!0r,!1d" },
+  { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1 }, "PsrldRI", "!0r,!1d" },
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
+  { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1 }, "PsllwRI", "!0r,!1d" },
+  { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1 }, "PslldRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
-  { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
 
   { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" },
   { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 72cdbbd..1807d5c 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -429,6 +429,136 @@
     void GenConst128(BasicBlock* bb, MIR* mir);
 
     /*
+     * @brief MIR to move a vectorized register to another.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination
+     * @note vC: source
+     */
+    void GenMoveVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: source
+     */
+    void GenMultiplyVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: source
+     */
+    void GenAddVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: source
+     */
+    void GenSubtractVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: immediate
+     */
+    void GenShiftLeftVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: immediate
+     */
+    void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from..
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: immediate
+     */
+    void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: source
+     */
+    void GenAndVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: source
+     */
+    void GenOrVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination and source
+     * @note vC: source
+     */
+    void GenXorVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Reduce a 128-bit packed element into a single VR by taking lower bits
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @details Instruction does a horizontal addition of the packed elements and then adds it to VR.
+     * @note vA: TypeSize
+     * @note vB: destination and source VR (not vector register)
+     * @note vC: source (vector register)
+     */
+    void GenAddReduceVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Extract a packed element into a single VR.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize
+     * @note vB: destination VR (not vector register)
+     * @note vC: source (vector register)
+     * @note arg[0]: The index to use for extraction from vector register (which packed element).
+     */
+    void GenReduceVector(BasicBlock *bb, MIR *mir);
+
+    /*
+     * @brief Create a vector value, with all TypeSize values equal to vC
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector.
+     * @note vA: TypeSize.
+     * @note vB: destination vector register.
+     * @note vC: source VR (not vector register).
+     */
+    void GenSetVector(BasicBlock *bb, MIR *mir);
+
+    /*
      * @brief Generate code for a vector opcode.
      * @param bb The basic block in which the MIR is from.
      * @param mir The MIR whose opcode is a non-standard opcode.
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e7a629a..19ad2f8 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -35,6 +35,12 @@
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
 #endif
 };
+static const RegStorage core_regs_arr_64q[] = {
+    rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
+#ifdef TARGET_REX_SUPPORT
+    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
+#endif
+};
 static const RegStorage sp_regs_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
@@ -55,6 +61,7 @@
 };
 static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
 static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
+static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
 static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
 static const RegStorage core_temps_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
@@ -62,6 +69,12 @@
     rs_r8, rs_r9, rs_r10, rs_r11
 #endif
 };
+static const RegStorage core_temps_arr_64q[] = {
+    rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
+#ifdef TARGET_REX_SUPPORT
+    rs_r8q, rs_r9q, rs_r10q, rs_r11q
+#endif
+};
 static const RegStorage sp_temps_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
@@ -81,11 +94,23 @@
 #endif
 };
 
+static const RegStorage xp_temps_arr_32[] = {
+    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+};
+static const RegStorage xp_temps_arr_64[] = {
+    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+#ifdef TARGET_REX_SUPPORT
+    rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+#endif
+};
+
 static const std::vector<RegStorage> empty_pool;
 static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
     core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
 static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
     core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
+static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q,
+    core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0]));
 static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
     sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
 static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
@@ -98,10 +123,14 @@
     reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
 static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
     reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
+static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q,
+    reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0]));
 static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
     core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
 static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
     core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
+static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q,
+    core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0]));
 static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
     sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
 static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
@@ -111,6 +140,11 @@
 static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
     dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
 
+static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32,
+    xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0]));
+static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64,
+    xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0]));
+
 RegStorage rs_rX86_SP;
 
 X86NativeRegisterPool rX86_ARG0;
@@ -209,7 +243,7 @@
   /* Double registers in x86 are just a single FP register */
   seed = 1;
   /* FP register starts at bit position 16 */
-  shift = reg.IsFloat() ? kX86FPReg0 : 0;
+  shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0;
   /* Expand the double register id into single offset */
   shift += reg_id;
   return (seed << shift);
@@ -531,9 +565,9 @@
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
   if (Gen64Bit()) {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, empty_pool, core_regs_64, sp_regs_64,
-                                          dp_regs_64, empty_pool, reserved_regs_64,
-                                          empty_pool, core_temps_64, sp_temps_64, dp_temps_64);
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64,
+                                          dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/,
+                                          core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64);
   } else {
     reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
                                           dp_regs_32, reserved_regs_32, empty_pool,
@@ -542,17 +576,31 @@
 
   // Target-specific adjustments.
 
+  // Add in XMM registers.
+  const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
+  for (RegStorage reg : *xp_temps) {
+    RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
+    reginfo_map_.Put(reg.GetReg(), info);
+    info->SetIsTemp(true);
+  }
+
   // Alias single precision xmm to double xmms.
   // TODO: as needed, add larger vector sizes - alias all to the largest.
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     int sp_reg_num = info->GetReg().GetRegNum();
+    RegStorage xp_reg = RegStorage::Solo128(sp_reg_num);
+    RegisterInfo* xp_reg_info = GetRegInfo(xp_reg);
+    // 128-bit xmm vector register's master storage should refer to itself.
+    DCHECK_EQ(xp_reg_info, xp_reg_info->Master());
+
+    // Redirect 32-bit vector's master storage to 128-bit vector.
+    info->SetMaster(xp_reg_info);
+
     RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-    // 64-bit xmm vector register's master storage should refer to itself.
-    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
-    // Redirect 32-bit vector's master storage to 64-bit vector.
-    info->SetMaster(dp_reg_info);
+    // Redirect 64-bit vector's master storage to 128-bit vector.
+    dp_reg_info->SetMaster(xp_reg_info);
   }
 
   // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
@@ -1240,6 +1288,45 @@
     case kMirOpConstVector:
       GenConst128(bb, mir);
       break;
+    case kMirOpMoveVector:
+      GenMoveVector(bb, mir);
+      break;
+    case kMirOpPackedMultiply:
+      GenMultiplyVector(bb, mir);
+      break;
+    case kMirOpPackedAddition:
+      GenAddVector(bb, mir);
+      break;
+    case kMirOpPackedSubtract:
+      GenSubtractVector(bb, mir);
+      break;
+    case kMirOpPackedShiftLeft:
+      GenShiftLeftVector(bb, mir);
+      break;
+    case kMirOpPackedSignedShiftRight:
+      GenSignedShiftRightVector(bb, mir);
+      break;
+    case kMirOpPackedUnsignedShiftRight:
+      GenUnsignedShiftRightVector(bb, mir);
+      break;
+    case kMirOpPackedAnd:
+      GenAndVector(bb, mir);
+      break;
+    case kMirOpPackedOr:
+      GenOrVector(bb, mir);
+      break;
+    case kMirOpPackedXor:
+      GenXorVector(bb, mir);
+      break;
+    case kMirOpPackedAddReduce:
+      GenAddReduceVector(bb, mir);
+      break;
+    case kMirOpPackedReduce:
+      GenReduceVector(bb, mir);
+      break;
+    case kMirOpPackedSet:
+      GenSetVector(bb, mir);
+      break;
     default:
       break;
   }
@@ -1249,9 +1336,9 @@
   int type_size = mir->dalvikInsn.vA;
   // We support 128 bit vectors.
   DCHECK_EQ(type_size & 0xFFFF, 128);
-  int reg = mir->dalvikInsn.vB;
-  DCHECK_LT(reg, 8);
+  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
   uint32_t *args = mir->dalvikInsn.arg;
+  int reg = rs_dest.GetReg();
   // Check for all 0 case.
   if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
     NewLIR2(kX86XorpsRR, reg, reg);
@@ -1277,6 +1364,287 @@
   SetMemRefType(load, true, kLiteral);
 }
 
+void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
+  // We only support 128 bit registers.
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC);
+  NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg());
+}
+
+void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PmulldRR;
+      break;
+    case kSignedHalf:
+      opcode = kX86PmullwRR;
+      break;
+    case kSingle:
+      opcode = kX86MulpsRR;
+      break;
+    case kDouble:
+      opcode = kX86MulpdRR;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector multiply " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PadddRR;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PaddwRR;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kX86PaddbRR;
+      break;
+    case kSingle:
+      opcode = kX86AddpsRR;
+      break;
+    case kDouble:
+      opcode = kX86AddpdRR;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector addition " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PsubdRR;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PsubwRR;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kX86PsubbRR;
+      break;
+    case kSingle:
+      opcode = kX86SubpsRR;
+      break;
+    case kDouble:
+      opcode = kX86SubpdRR;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector subtraction " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  int imm = mir->dalvikInsn.vC;
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PslldRI;
+      break;
+    case k64:
+      opcode = kX86PsllqRI;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PsllwRI;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector shift left " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  int imm = mir->dalvikInsn.vC;
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PsradRI;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PsrawRI;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector signed shift right " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  int imm = mir->dalvikInsn.vC;
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PsrldRI;
+      break;
+    case k64:
+      opcode = kX86PsrlqRI;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PsrlwRI;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) {
+  // We only support 128 bit registers.
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+  NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) {
+  // We only support 128 bit registers.
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) {
+  // We only support 128 bit registers.
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+  NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+  int imm = mir->dalvikInsn.vC;
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PhadddRR;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PhaddwRR;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector add reduce " << opsize;
+      break;
+  }
+  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
+  int index = mir->dalvikInsn.arg[0];
+  int opcode = 0;
+  switch (opsize) {
+    case k32:
+      opcode = kX86PextrdRRI;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      opcode = kX86PextrwRRI;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kX86PextrbRRI;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector reduce " << opsize;
+      break;
+  }
+  // We need to extract to a GPR.
+  RegStorage temp = AllocTemp();
+  NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index);
+
+  // Assume that the destination VR is in the def for the mir.
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  RegLocation rl_temp =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG};
+  StoreValue(rl_dest, rl_temp);
+}
+
+void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
+  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
+  int op_low = 0, op_high = 0;
+  switch (opsize) {
+    case k32:
+      op_low = kX86PshufdRRI;
+      break;
+    case kSignedHalf:
+    case kUnsignedHalf:
+      // Handles low quadword.
+      op_low = kX86PshuflwRRI;
+      // Handles upper quadword.
+      op_high = kX86PshufdRRI;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported vector set " << opsize;
+      break;
+  }
+
+  // Load the value from the VR into a GPR.
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  rl_src = LoadValue(rl_src, kCoreReg);
+
+  // Load the value into the XMM register.
+  NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg());
+
+  // Now shuffle the value across the destination.
+  NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0);
+
+  // And then repeat as needed.
+  if (op_high != 0) {
+    NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0);
+  }
+}
+
+
 LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
   int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
   for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index adfed0c..964422c 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -117,41 +117,56 @@
 // FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum?
 enum X86NativeRegisterPool {
   r0             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
+  r0q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0,
   rAX            = r0,
   r1             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
+  r1q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 1,
   rCX            = r1,
   r2             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
+  r2q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 2,
   rDX            = r2,
   r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
+  r3q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 3,
   rBX            = r3,
   r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
   rX86_SP_32     = r4sp_32,
   r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
   rX86_SP_64     = r4sp_64,
   r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
+  r5q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 5,
   rBP            = r5,
   r5sib_no_base  = r5,
   r6             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
+  r6q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 6,
   rSI            = r6,
   r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
+  r7q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 7,
   rDI            = r7,
 #ifndef TARGET_REX_SUPPORT
   // fake return address register for core spill mask.
   rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
 #else
   r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
+  r8q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 8,
   r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
+  r9q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 9,
   r10            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
+  r10q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 10,
   r11            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
+  r11q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 11,
   r12            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
+  r12q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 12,
   r13            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
+  r13q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 13,
   r14            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
+  r14q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 14,
   r15            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
+  r15q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15,
   // fake return address register for core spill mask.
   rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
 #endif
 
-  // xmm registers, single precision view
+  // xmm registers, single precision view.
   fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
   fr1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1,
   fr2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2,
@@ -161,7 +176,7 @@
   fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
   fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
 
-  // xmm registers, double precision alises
+  // xmm registers, double precision aliases.
   dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
   dr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1,
   dr2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2,
@@ -171,37 +186,62 @@
   dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
   dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
 
-  // xmm registers, quad precision alises
-  qr0  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 0,
-  qr1  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 1,
-  qr2  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 2,
-  qr3  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 3,
-  qr4  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 4,
-  qr5  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 5,
-  qr6  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 6,
-  qr7  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 7,
+  // xmm registers aliases.
+  xr0  = RegStorage::k128BitSolo | 0,
+  xr1  = RegStorage::k128BitSolo | 1,
+  xr2  = RegStorage::k128BitSolo | 2,
+  xr3  = RegStorage::k128BitSolo | 3,
+  xr4  = RegStorage::k128BitSolo | 4,
+  xr5  = RegStorage::k128BitSolo | 5,
+  xr6  = RegStorage::k128BitSolo | 6,
+  xr7  = RegStorage::k128BitSolo | 7,
 
   // TODO: as needed, add 256, 512 and 1024-bit xmm views.
 };
 
 constexpr RegStorage rs_r0(RegStorage::kValid | r0);
+constexpr RegStorage rs_r0q(RegStorage::kValid | r0q);
 constexpr RegStorage rs_rAX = rs_r0;
 constexpr RegStorage rs_r1(RegStorage::kValid | r1);
+constexpr RegStorage rs_r1q(RegStorage::kValid | r1q);
 constexpr RegStorage rs_rCX = rs_r1;
 constexpr RegStorage rs_r2(RegStorage::kValid | r2);
+constexpr RegStorage rs_r2q(RegStorage::kValid | r2q);
 constexpr RegStorage rs_rDX = rs_r2;
 constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+constexpr RegStorage rs_r3q(RegStorage::kValid | r3q);
 constexpr RegStorage rs_rBX = rs_r3;
 constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
 constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
 extern RegStorage rs_rX86_SP;
 constexpr RegStorage rs_r5(RegStorage::kValid | r5);
+constexpr RegStorage rs_r5q(RegStorage::kValid | r5q);
 constexpr RegStorage rs_rBP = rs_r5;
 constexpr RegStorage rs_r6(RegStorage::kValid | r6);
+constexpr RegStorage rs_r6q(RegStorage::kValid | r6q);
 constexpr RegStorage rs_rSI = rs_r6;
 constexpr RegStorage rs_r7(RegStorage::kValid | r7);
+constexpr RegStorage rs_r7q(RegStorage::kValid | r7q);
 constexpr RegStorage rs_rDI = rs_r7;
 constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_r8(RegStorage::kValid | r8);
+constexpr RegStorage rs_r8q(RegStorage::kValid | r8q);
+constexpr RegStorage rs_r9(RegStorage::kValid | r9);
+constexpr RegStorage rs_r9q(RegStorage::kValid | r9q);
+constexpr RegStorage rs_r10(RegStorage::kValid | r10);
+constexpr RegStorage rs_r10q(RegStorage::kValid | r10q);
+constexpr RegStorage rs_r11(RegStorage::kValid | r11);
+constexpr RegStorage rs_r11q(RegStorage::kValid | r11q);
+constexpr RegStorage rs_r12(RegStorage::kValid | r12);
+constexpr RegStorage rs_r12q(RegStorage::kValid | r12q);
+constexpr RegStorage rs_r13(RegStorage::kValid | r13);
+constexpr RegStorage rs_r13q(RegStorage::kValid | r13q);
+constexpr RegStorage rs_r14(RegStorage::kValid | r14);
+constexpr RegStorage rs_r14q(RegStorage::kValid | r14q);
+constexpr RegStorage rs_r15(RegStorage::kValid | r15);
+constexpr RegStorage rs_r15q(RegStorage::kValid | r15q);
+#endif
 
 constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
 constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
@@ -221,14 +261,14 @@
 constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
 constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
 
-constexpr RegStorage rs_qr0(RegStorage::kValid | qr0);
-constexpr RegStorage rs_qr1(RegStorage::kValid | qr1);
-constexpr RegStorage rs_qr2(RegStorage::kValid | qr2);
-constexpr RegStorage rs_qr3(RegStorage::kValid | qr3);
-constexpr RegStorage rs_qr4(RegStorage::kValid | qr4);
-constexpr RegStorage rs_qr5(RegStorage::kValid | qr5);
-constexpr RegStorage rs_qr6(RegStorage::kValid | qr6);
-constexpr RegStorage rs_qr7(RegStorage::kValid | qr7);
+constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
+constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
+constexpr RegStorage rs_xr2(RegStorage::kValid | xr2);
+constexpr RegStorage rs_xr3(RegStorage::kValid | xr3);
+constexpr RegStorage rs_xr4(RegStorage::kValid | xr4);
+constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
+constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
+constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
 
 extern X86NativeRegisterPool rX86_ARG0;
 extern X86NativeRegisterPool rX86_ARG1;
@@ -418,9 +458,39 @@
   Binary0fOpCode(kX86Divsd),    // double divide
   Binary0fOpCode(kX86Divss),    // float divide
   Binary0fOpCode(kX86Punpckldq),  // Interleave low-order double words
-  kX86PsrlqRI,                  // right shift of floating point registers
-  kX86PsllqRI,                  // left shift of floating point registers
-  kX86SqrtsdRR,                 // sqrt of floating point register
+  Binary0fOpCode(kX86Sqrtsd),   // square root
+  Binary0fOpCode(kX86Pmulld),   // parallel integer multiply 32 bits x 4
+  Binary0fOpCode(kX86Pmullw),   // parallel integer multiply 16 bits x 8
+  Binary0fOpCode(kX86Mulps),    // parallel FP multiply 32 bits x 4
+  Binary0fOpCode(kX86Mulpd),    // parallel FP multiply 64 bits x 2
+  Binary0fOpCode(kX86Paddb),    // parallel integer addition 8 bits x 16
+  Binary0fOpCode(kX86Paddw),    // parallel integer addition 16 bits x 8
+  Binary0fOpCode(kX86Paddd),    // parallel integer addition 32 bits x 4
+  Binary0fOpCode(kX86Addps),    // parallel FP addition 32 bits x 4
+  Binary0fOpCode(kX86Addpd),    // parallel FP addition 64 bits x 2
+  Binary0fOpCode(kX86Psubb),    // parallel integer subtraction 8 bits x 16
+  Binary0fOpCode(kX86Psubw),    // parallel integer subtraction 16 bits x 8
+  Binary0fOpCode(kX86Psubd),    // parallel integer subtraction 32 bits x 4
+  Binary0fOpCode(kX86Subps),    // parallel FP subtraction 32 bits x 4
+  Binary0fOpCode(kX86Subpd),    // parallel FP subtraction 64 bits x 2
+  Binary0fOpCode(kX86Pand),     // parallel AND 128 bits x 1
+  Binary0fOpCode(kX86Por),      // parallel OR 128 bits x 1
+  Binary0fOpCode(kX86Pxor),     // parallel XOR 128 bits x 1
+  Binary0fOpCode(kX86Phaddw),   // parallel horizontal addition 16 bits x 8
+  Binary0fOpCode(kX86Phaddd),   // parallel horizontal addition 32 bits x 4
+  kX86PextrbRRI,                // Extract 8 bits from XMM into GPR
+  kX86PextrwRRI,                // Extract 16 bits from XMM into GPR
+  kX86PextrdRRI,                // Extract 32 bits from XMM into GPR
+  kX86PshuflwRRI,               // Shuffle 16 bits in lower 64 bits of XMM.
+  kX86PshufdRRI,                // Shuffle 32 bits in XMM.
+  kX86PsrawRI,                  // signed right shift of floating point registers 16 bits x 8
+  kX86PsradRI,                  // signed right shift of floating point registers 32 bits x 4
+  kX86PsrlwRI,                  // logical right shift of floating point registers 16 bits x 8
+  kX86PsrldRI,                  // logical right shift of floating point registers 32 bits x 4
+  kX86PsrlqRI,                  // logical right shift of floating point registers 64 bits x 2
+  kX86PsllwRI,                  // left shift of floating point registers 16 bits x 8
+  kX86PslldRI,                  // left shift of floating point registers 32 bits x 4
+  kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
   kX86Fild32M,                  // push 32-bit integer on x87 stack
   kX86Fild64M,                  // push 64-bit integer on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index df21343..2f7e701 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -280,6 +280,11 @@
     return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
   }
 
+  // Create a 128-bit solo.
+  static RegStorage Solo128(int reg_num) {
+    return RegStorage(k128BitSolo, reg_num & kRegTypeMask);
+  }
+
   // Create a 64-bit solo.
   static RegStorage Solo64(int reg_num) {
     return RegStorage(k64BitSolo, reg_num & kRegTypeMask);
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 08fd386..45abfcc 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -42,8 +42,8 @@
 }
 
 inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass(
-    ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-    const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit) {
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   const DexFile::MethodId& referrer_method_id =
@@ -59,8 +59,8 @@
 }
 
 inline mirror::ArtField* CompilerDriver::ResolveField(
-    ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-    const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t field_idx, bool is_static) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
@@ -165,13 +165,14 @@
 }
 
 inline mirror::ArtMethod* CompilerDriver::ResolveMethod(
-    ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-    const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t method_idx, InvokeType invoke_type) {
-  DCHECK(dex_cache->GetDexFile() == mUnit->GetDexFile());
-  DCHECK(class_loader.Get() == soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+  DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
+  DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   mirror::ArtMethod* resolved_method = mUnit->GetClassLinker()->ResolveMethod(
-      *mUnit->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
+      *mUnit->GetDexFile(), method_idx, dex_cache, class_loader, NullHandle<mirror::ArtMethod>(),
+      invoke_type);
   DCHECK_EQ(resolved_method == nullptr, soa.Self()->IsExceptionPending());
   if (UNLIKELY(resolved_method == nullptr)) {
     // Clean up any exception left by type resolution.
@@ -206,8 +207,8 @@
 }
 
 inline int CompilerDriver::IsFastInvoke(
-    ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-    const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     mirror::Class* referrer_class, mirror::ArtMethod* resolved_method, InvokeType* invoke_type,
     MethodReference* target_method, const MethodReference* devirt_target,
     uintptr_t* direct_code, uintptr_t* direct_method) {
@@ -256,15 +257,17 @@
     ClassLinker* class_linker = mUnit->GetClassLinker();
     if (LIKELY(devirt_target->dex_file == mUnit->GetDexFile())) {
       called_method = class_linker->ResolveMethod(*devirt_target->dex_file,
-                                                  devirt_target->dex_method_index,
-                                                  dex_cache, class_loader, NULL, kVirtual);
+                                                  devirt_target->dex_method_index, dex_cache,
+                                                  class_loader, NullHandle<mirror::ArtMethod>(),
+                                                  kVirtual);
     } else {
       StackHandleScope<1> hs(soa.Self());
       Handle<mirror::DexCache> target_dex_cache(
           hs.NewHandle(class_linker->FindDexCache(*devirt_target->dex_file)));
       called_method = class_linker->ResolveMethod(*devirt_target->dex_file,
                                                   devirt_target->dex_method_index,
-                                                  target_dex_cache, class_loader, NULL, kVirtual);
+                                                  target_dex_cache, class_loader,
+                                                  NullHandle<mirror::ArtMethod>(), kVirtual);
     }
     CHECK(called_method != NULL);
     CHECK(!called_method->IsAbstract());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 0f41d2b..3304561 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -511,7 +511,7 @@
 }
 
 static DexToDexCompilationLevel GetDexToDexCompilationlevel(
-    Thread* self, Handle<mirror::ClassLoader>& class_loader, const DexFile& dex_file,
+    Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file,
     const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -731,11 +731,11 @@
     for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) {
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
-      StackHandleScope<3> hs(self);
+      StackHandleScope<2> hs(self);
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(*dex_file)));
-      auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
       Handle<mirror::Class> klass(hs.NewHandle(
-          class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache, class_loader)));
+          class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
+                                    NullHandle<mirror::ClassLoader>())));
       if (klass.Get() == NULL) {
         const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
@@ -1541,7 +1541,8 @@
       if (resolve_fields_and_methods) {
         while (it.HasNextDirectMethod()) {
           mirror::ArtMethod* method = class_linker->ResolveMethod(dex_file, it.GetMemberIndex(),
-                                                                  dex_cache, class_loader, NULL,
+                                                                  dex_cache, class_loader,
+                                                                  NullHandle<mirror::ArtMethod>(),
                                                                   it.GetMethodInvokeType(class_def));
           if (method == NULL) {
             CHECK(soa.Self()->IsExceptionPending());
@@ -1551,7 +1552,8 @@
         }
         while (it.HasNextVirtualMethod()) {
           mirror::ArtMethod* method = class_linker->ResolveMethod(dex_file, it.GetMemberIndex(),
-                                                                  dex_cache, class_loader, NULL,
+                                                                  dex_cache, class_loader,
+                                                                  NullHandle<mirror::ArtMethod>(),
                                                                   it.GetMethodInvokeType(class_def));
           if (method == NULL) {
             CHECK(soa.Self()->IsExceptionPending());
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index abca659..14ccb50 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -221,15 +221,15 @@
 
   // Resolve compiling method's class. Returns nullptr on failure.
   mirror::Class* ResolveCompilingMethodsClass(
-      ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-      const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit)
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a field. Returns nullptr on failure, including incompatible class change.
   // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static.
   mirror::ArtField* ResolveField(
-      ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-      const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       uint32_t field_idx, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -258,8 +258,8 @@
 
   // Resolve a method. Returns nullptr on failure, including incompatible class change.
   mirror::ArtMethod* ResolveMethod(
-      ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-      const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       uint32_t method_idx, InvokeType invoke_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -277,8 +277,8 @@
   // Can we fast-path an INVOKE? If no, returns 0. If yes, returns a non-zero opaque flags value
   // for ProcessedInvoke() and computes the necessary lowering info.
   int IsFastInvoke(
-      ScopedObjectAccess& soa, const Handle<mirror::DexCache>& dex_cache,
-      const Handle<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       mirror::Class* referrer_class, mirror::ArtMethod* resolved_method, InvokeType* invoke_type,
       MethodReference* target_method, const MethodReference* devirt_target,
       uintptr_t* direct_code, uintptr_t* direct_method)
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 4efd27d..964dfeb 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -152,10 +152,9 @@
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    auto null_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
-    CompileVirtualMethod(null_loader, "java.lang.Class", "isFinalizable", "()Z");
-    CompileDirectMethod(null_loader, "java.lang.Object", "<init>", "()V");
+    CompileVirtualMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Class", "isFinalizable",
+                         "()Z");
+    CompileDirectMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Object", "<init>", "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != NULL);
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index 0e27210..3dba426 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -361,10 +361,11 @@
       ClassLinker* linker = Runtime::Current()->GetClassLinker();
       // Unchecked as we hold mutator_lock_ on entry.
       ScopedObjectAccessUnchecked soa(Thread::Current());
-      StackHandleScope<2> hs(soa.Self());
+      StackHandleScope<1> hs(soa.Self());
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(dex_file)));
-      auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
-      method = linker->ResolveMethod(dex_file, method_idx, dex_cache, class_loader, NULL, invoke_type);
+      method = linker->ResolveMethod(dex_file, method_idx, dex_cache,
+                                     NullHandle<mirror::ClassLoader>(),
+                                     NullHandle<mirror::ArtMethod>(), invoke_type);
       CHECK(method != NULL);
     }
     const CompiledMethod* compiled_method =
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 70144c8..be53926 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -695,15 +695,14 @@
 static ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<2> hs(Thread::Current());
+  StackHandleScope<1> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(
       hs.NewHandle(class_linker->FindDexCache(*patch->GetTargetDexFile())));
-  auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
   ArtMethod* method = class_linker->ResolveMethod(*patch->GetTargetDexFile(),
                                                   patch->GetTargetMethodIdx(),
                                                   dex_cache,
-                                                  class_loader,
-                                                  NULL,
+                                                  NullHandle<mirror::ClassLoader>(),
+                                                  NullHandle<mirror::ArtMethod>(),
                                                   patch->GetTargetInvokeType());
   CHECK(method != NULL)
     << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
@@ -721,11 +720,8 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
-  auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
-  Class* klass = class_linker->ResolveType(patch->GetDexFile(),
-                                           patch->GetTargetTypeIdx(),
-                                           dex_cache,
-                                           class_loader);
+  Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(),
+                                           dex_cache, NullHandle<mirror::ClassLoader>());
   CHECK(klass != NULL)
     << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
   CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index a7ee82e..6812f3c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -154,8 +154,8 @@
     }
     const char* descriptor = dex_file->GetClassDescriptor(class_def);
     StackHandleScope<1> hs(soa.Self());
-    auto loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
-    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor, loader);
+    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor,
+                                                   NullHandle<mirror::ClassLoader>());
 
     const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(i);
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class.GetStatus()) << descriptor;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index bace25c..5d532ab 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -513,9 +513,10 @@
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file_)));
-    auto class_loader = hs.NewHandle<mirror::ClassLoader>(nullptr);
     mirror::ArtMethod* method = linker->ResolveMethod(*dex_file_, it.GetMemberIndex(), dex_cache,
-                                                      class_loader, nullptr, invoke_type);
+                                                      NullHandle<mirror::ClassLoader>(),
+                                                      NullHandle<mirror::ArtMethod>(),
+                                                      invoke_type);
     CHECK(method != NULL);
     // Portable code offsets are set by ElfWriterMclinker::FixupCompiledCodeOffset after linking.
     method->SetQuickOatCodeOffset(offsets.code_offset_);
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 5cc6acf..cba4ebf 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -363,10 +363,49 @@
         src_reg_file = dst_reg_file = SSE;
         break;
       case 0x38:  // 3 byte extended opcode
-        opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr);
+        instr++;
+        if (prefix[2] == 0x66) {
+          switch (*instr) {
+            case 0x40:
+              opcode << "pmulld";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            default:
+              opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr);
+          }
+        } else {
+          opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr);
+        }
         break;
       case 0x3A:  // 3 byte extended opcode
-        opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr);
+        instr++;
+        if (prefix[2] == 0x66) {
+          switch (*instr) {
+            case 0x14:
+              opcode << "pextrb";
+              prefix[2] = 0;
+              has_modrm = true;
+              store = true;
+              dst_reg_file = SSE;
+              immediate_bytes = 1;
+              break;
+            case 0x16:
+              opcode << "pextrd";
+              prefix[2] = 0;
+              has_modrm = true;
+              store = true;
+              dst_reg_file = SSE;
+              immediate_bytes = 1;
+              break;
+            default:
+              opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr);
+          }
+        } else {
+          opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr);
+        }
         break;
       case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
       case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
@@ -467,11 +506,11 @@
         break;
       case 0x6F:
         if (prefix[2] == 0x66) {
-          dst_reg_file = SSE;
+          src_reg_file = dst_reg_file = SSE;
           opcode << "movdqa";
           prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
         } else if (prefix[0] == 0xF3) {
-          dst_reg_file = SSE;
+          src_reg_file = dst_reg_file = SSE;
           opcode << "movdqu";
           prefix[0] = 0;  // clear prefix now it's served its purpose as part of the opcode
         } else {
@@ -481,6 +520,25 @@
         load = true;
         has_modrm = true;
         break;
+      case 0x70:
+        if (prefix[2] == 0x66) {
+          opcode << "pshufd";
+          prefix[2] = 0;
+          has_modrm = true;
+          store = true;
+          src_reg_file = dst_reg_file = SSE;
+          immediate_bytes = 1;
+        } else if (prefix[0] == 0xF2) {
+          opcode << "pshuflw";
+          prefix[0] = 0;
+          has_modrm = true;
+          store = true;
+          src_reg_file = dst_reg_file = SSE;
+          immediate_bytes = 1;
+        } else {
+          opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
+        }
+        break;
       case 0x71:
         if (prefix[2] == 0x66) {
           dst_reg_file = SSE;
@@ -603,6 +661,18 @@
       case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break;
       case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; break;
       case 0xBF: opcode << "movsxw"; has_modrm = true; load = true; break;
+      case 0xC5:
+        if (prefix[2] == 0x66) {
+          opcode << "pextrw";
+          prefix[2] = 0;
+          has_modrm = true;
+          store = true;
+          src_reg_file = dst_reg_file = SSE;
+          immediate_bytes = 1;
+        } else {
+          opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
+        }
+        break;
       case 0xC7:
         static const char* x0FxC7_opcodes[] = { "unknown-0f-c7", "cmpxchg8b", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7" };
         modrm_opcodes = x0FxC7_opcodes;
@@ -614,6 +684,125 @@
         opcode << "bswap";
         reg_in_opcode = true;
         break;
+      case 0xDB:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "pand";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xD5:
+        if (prefix[2] == 0x66) {
+          opcode << "pmullw";
+          prefix[2] = 0;
+          has_modrm = true;
+          load = true;
+          src_reg_file = dst_reg_file = SSE;
+        } else {
+          opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
+        }
+        break;
+      case 0xEB:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "por";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xEF:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "pxor";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xF8:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "psubb";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xF9:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "psubw";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xFA:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "psubd";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xFC:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "paddb";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xFD:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "paddw";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
+      case 0xFE:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "paddd";
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
       default:
         opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
         break;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 5dee1af..7c76b3c 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -424,10 +424,10 @@
       Runtime* runtime = Runtime::Current();
       if (runtime != nullptr) {
         ScopedObjectAccess soa(Thread::Current());
-        StackHandleScope<2> hs(soa.Self());
+        StackHandleScope<1> hs(soa.Self());
         Handle<mirror::DexCache> dex_cache(
             hs.NewHandle(runtime->GetClassLinker()->FindDexCache(dex_file)));
-        auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
+        NullHandle<mirror::ClassLoader> class_loader;
         verifier::MethodVerifier verifier(&dex_file, &dex_cache, &class_loader, &class_def,
                                           code_item, dex_method_idx, nullptr, method_access_flags,
                                           true, true, true);
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 95fcd73..96e0afd 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -298,7 +298,11 @@
     l.s     $f29, 116($a1)
     l.s     $f30, 120($a1)
     l.s     $f31, 124($a1)
+    .set push
+    .set nomacro
+    .set noat
     lw      $at, 4($a0)
+    .set pop
     lw      $v0, 8($a0)
     lw      $v1, 12($a0)
     lw      $a1, 20($a0)
@@ -322,8 +326,6 @@
     lw      $s7, 92($a0)
     lw      $t8, 96($a0)
     lw      $t9, 100($a0)
-    lw      $k0, 104($a0)
-    lw      $k1, 108($a0)
     lw      $gp, 112($a0)
     lw      $sp, 116($a0)
     lw      $fp, 120($a0)
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index ac86014..84afb2d 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -34,9 +34,7 @@
 }
 
 inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
-  StackHandleScope<1> hs(self);
-  auto class_loader = hs.NewHandle<mirror::ClassLoader>(nullptr);
-  return FindClass(self, descriptor, class_loader);
+  return FindClass(self, descriptor, NullHandle<mirror::ClassLoader>());
 }
 
 inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class* element_class) {
@@ -110,31 +108,47 @@
   return resolved_type;
 }
 
-inline mirror::ArtMethod* ClassLinker::ResolveMethod(uint32_t method_idx,
-                                                     mirror::ArtMethod* referrer,
-                                                     InvokeType type) {
+inline mirror::ArtMethod* ClassLinker::GetResolvedMethod(uint32_t method_idx,
+                                                         mirror::ArtMethod* referrer,
+                                                         InvokeType type) {
   mirror::ArtMethod* resolved_method =
       referrer->GetDexCacheResolvedMethods()->Get(method_idx);
-  if (UNLIKELY(resolved_method == NULL || resolved_method->IsRuntimeMethod())) {
-    mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    StackHandleScope<2> hs(Thread::Current());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    resolved_method = ResolveMethod(dex_file, method_idx, dex_cache, class_loader, referrer, type);
-    if (resolved_method != nullptr) {
-      DCHECK_EQ(dex_cache->GetResolvedMethod(method_idx), resolved_method);
-    }
+  if (resolved_method == nullptr || resolved_method->IsRuntimeMethod()) {
+    return nullptr;
   }
   return resolved_method;
 }
 
-inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx,
-                                                   mirror::ArtMethod* referrer,
+inline mirror::ArtMethod* ClassLinker::ResolveMethod(Thread* self, uint32_t method_idx,
+                                                     mirror::ArtMethod** referrer,
+                                                     InvokeType type) {
+  mirror::ArtMethod* resolved_method = GetResolvedMethod(method_idx, *referrer, type);
+  if (LIKELY(resolved_method != nullptr)) {
+    return resolved_method;
+  }
+  mirror::Class* declaring_class = (*referrer)->GetDeclaringClass();
+  StackHandleScope<3> hs(self);
+  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
+  Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
+  HandleWrapper<mirror::ArtMethod> h_referrer(hs.NewHandleWrapper(referrer));
+  const DexFile* dex_file = h_dex_cache->GetDexFile();
+  resolved_method = ResolveMethod(*dex_file, method_idx, h_dex_cache, h_class_loader, h_referrer,
+                                  type);
+  if (resolved_method != nullptr) {
+    DCHECK_EQ(h_dex_cache->GetResolvedMethod(method_idx), resolved_method);
+  }
+  return resolved_method;
+}
+
+inline mirror::ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
+                                                       mirror::Class* field_declaring_class) {
+  return field_declaring_class->GetDexCache()->GetResolvedField(field_idx);
+}
+
+inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                                    bool is_static) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  mirror::ArtField* resolved_field =
-      declaring_class->GetDexCache()->GetResolvedField(field_idx);
+  mirror::ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
   if (UNLIKELY(resolved_field == NULL)) {
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index c7302b5..afff7a2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1369,7 +1369,7 @@
 }
 
 mirror::Class* ClassLinker::FindClass(Thread* self, const char* descriptor,
-                                      const Handle<mirror::ClassLoader>& class_loader) {
+                                      Handle<mirror::ClassLoader> class_loader) {
   DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
   DCHECK(self != nullptr);
   self->AssertNoPendingException();
@@ -1390,8 +1390,7 @@
     DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, boot_class_path_);
     if (pair.second != NULL) {
       StackHandleScope<1> hs(self);
-      auto class_loader = hs.NewHandle<mirror::ClassLoader>(nullptr);
-      return DefineClass(descriptor, class_loader, *pair.first, *pair.second);
+      return DefineClass(descriptor, NullHandle<mirror::ClassLoader>(), *pair.first, *pair.second);
     }
   } else if (Runtime::Current()->UseCompileTimeClassPath()) {
     // First try the boot class path, we check the descriptor first to avoid an unnecessary
@@ -1452,7 +1451,7 @@
 }
 
 mirror::Class* ClassLinker::DefineClass(const char* descriptor,
-                                        const Handle<mirror::ClassLoader>& class_loader,
+                                        Handle<mirror::ClassLoader> class_loader,
                                         const DexFile& dex_file,
                                         const DexFile::ClassDef& dex_class_def) {
   Thread* self = Thread::Current();
@@ -1796,10 +1795,9 @@
   // Ignore virtual methods on the iterator.
 }
 
-void ClassLinker::LinkCode(const Handle<mirror::ArtMethod>& method,
-                           const OatFile::OatClass* oat_class,
+void ClassLinker::LinkCode(Handle<mirror::ArtMethod> method, const OatFile::OatClass* oat_class,
                            const DexFile& dex_file, uint32_t dex_method_index,
-                           uint32_t method_index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+                           uint32_t method_index) {
   // Method shouldn't have already been linked.
   DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
   DCHECK(method->GetEntryPointFromPortableCompiledCode() == nullptr);
@@ -1871,7 +1869,7 @@
 
 void ClassLinker::LoadClass(const DexFile& dex_file,
                             const DexFile::ClassDef& dex_class_def,
-                            const Handle<mirror::Class>& klass,
+                            Handle<mirror::Class> klass,
                             mirror::ClassLoader* class_loader) {
   CHECK(klass.Get() != NULL);
   CHECK(klass->GetDexCache() != NULL);
@@ -1909,7 +1907,7 @@
 
 void ClassLinker::LoadClassMembers(const DexFile& dex_file,
                                    const byte* class_data,
-                                   const Handle<mirror::Class>& klass,
+                                   Handle<mirror::Class> klass,
                                    mirror::ClassLoader* class_loader,
                                    const OatFile::OatClass* oat_class) {
   // Load fields.
@@ -2007,8 +2005,7 @@
 }
 
 void ClassLinker::LoadField(const DexFile& /*dex_file*/, const ClassDataItemIterator& it,
-                            const Handle<mirror::Class>& klass,
-                            const Handle<mirror::ArtField>& dst) {
+                            Handle<mirror::Class> klass, Handle<mirror::ArtField> dst) {
   uint32_t field_idx = it.GetMemberIndex();
   dst->SetDexFieldIndex(field_idx);
   dst->SetDeclaringClass(klass.Get());
@@ -2017,7 +2014,7 @@
 
 mirror::ArtMethod* ClassLinker::LoadMethod(Thread* self, const DexFile& dex_file,
                                            const ClassDataItemIterator& it,
-                                           const Handle<mirror::Class>& klass) {
+                                           Handle<mirror::Class> klass) {
   uint32_t dex_method_idx = it.GetMemberIndex();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
   const char* method_name = dex_file.StringDataByIdx(method_id.name_idx_);
@@ -2088,7 +2085,7 @@
 }
 
 void ClassLinker::AppendToBootClassPath(const DexFile& dex_file,
-                                        const Handle<mirror::DexCache>& dex_cache) {
+                                        Handle<mirror::DexCache> dex_cache) {
   CHECK(dex_cache.Get() != NULL) << dex_file.GetLocation();
   boot_class_path_.push_back(&dex_file);
   RegisterDexFile(dex_file, dex_cache);
@@ -2110,7 +2107,7 @@
 }
 
 void ClassLinker::RegisterDexFileLocked(const DexFile& dex_file,
-                                        const Handle<mirror::DexCache>& dex_cache) {
+                                        Handle<mirror::DexCache> dex_cache) {
   dex_lock_.AssertExclusiveHeld(Thread::Current());
   CHECK(dex_cache.Get() != NULL) << dex_file.GetLocation();
   CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()))
@@ -2147,7 +2144,7 @@
 }
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file,
-                                  const Handle<mirror::DexCache>& dex_cache) {
+                                  Handle<mirror::DexCache> dex_cache) {
   WriterMutexLock mu(Thread::Current(), dex_lock_);
   RegisterDexFileLocked(dex_file, dex_cache);
 }
@@ -2224,7 +2221,7 @@
 //
 // Returns NULL with an exception raised on failure.
 mirror::Class* ClassLinker::CreateArrayClass(Thread* self, const char* descriptor,
-                                             const Handle<mirror::ClassLoader>& class_loader) {
+                                             Handle<mirror::ClassLoader> class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
   StackHandleScope<2> hs(self);
@@ -2416,7 +2413,7 @@
        it != end && it->first == hash;
        ++it) {
     mirror::Class* klass = it->second;
-    if (klass->GetClassLoader() == class_loader && descriptor == klass->GetDescriptor()) {
+    if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       class_table_.erase(it);
       return true;
     }
@@ -2460,13 +2457,13 @@
   auto end = class_table_.end();
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    if (klass->GetClassLoader() == class_loader && descriptor == klass->GetDescriptor()) {
+    if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
           mirror::Class* klass2 = it->second;
-          CHECK(!((klass2->GetClassLoader() == class_loader) &&
-              descriptor == klass2->GetDescriptor()))
+          CHECK(!(klass2->GetClassLoader() == class_loader &&
+              klass2->DescriptorEquals(descriptor)))
               << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
               << PrettyClass(klass2) << " " << klass2 << " " << klass2->GetClassLoader();
         }
@@ -2557,13 +2554,13 @@
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
       it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    if (descriptor == klass->GetDescriptor()) {
+    if (klass->DescriptorEquals(descriptor)) {
       result.push_back(klass);
     }
   }
 }
 
-void ClassLinker::VerifyClass(const Handle<mirror::Class>& klass) {
+void ClassLinker::VerifyClass(Handle<mirror::Class> klass) {
   // TODO: assert that the monitor on the Class is held
   Thread* self = Thread::Current();
   ObjectLock<mirror::Class> lock(self, klass);
@@ -2776,7 +2773,7 @@
 }
 
 void ClassLinker::ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
-                                                    const Handle<mirror::Class>& klass) {
+                                                    Handle<mirror::Class> klass) {
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     ResolveMethodExceptionHandlerTypes(dex_file, klass->GetDirectMethod(i));
   }
@@ -2817,7 +2814,7 @@
 
 static void CheckProxyConstructor(mirror::ArtMethod* constructor);
 static void CheckProxyMethod(mirror::ArtMethod* method,
-                             Handle<mirror::ArtMethod>& prototype);
+                             Handle<mirror::ArtMethod> prototype);
 
 mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
                                              jobjectArray interfaces, jobject loader,
@@ -2999,7 +2996,7 @@
 
 
 mirror::ArtMethod* ClassLinker::CreateProxyConstructor(Thread* self,
-                                                       const Handle<mirror::Class>& klass,
+                                                       Handle<mirror::Class> klass,
                                                        mirror::Class* proxy_class) {
   // Create constructor for Proxy that must initialize h
   mirror::ObjectArray<mirror::ArtMethod>* proxy_direct_methods =
@@ -3030,8 +3027,8 @@
 }
 
 mirror::ArtMethod* ClassLinker::CreateProxyMethod(Thread* self,
-                                                  const Handle<mirror::Class>& klass,
-                                                  const Handle<mirror::ArtMethod>& prototype) {
+                                                  Handle<mirror::Class> klass,
+                                                  Handle<mirror::ArtMethod> prototype) {
   // Ensure prototype is in dex cache so that we can use the dex cache to look up the overridden
   // prototype method
   prototype->GetDeclaringClass()->GetDexCache()->SetResolvedMethod(prototype->GetDexMethodIndex(),
@@ -3058,8 +3055,7 @@
   return method;
 }
 
-static void CheckProxyMethod(mirror::ArtMethod* method,
-                             Handle<mirror::ArtMethod>& prototype)
+static void CheckProxyMethod(mirror::ArtMethod* method, Handle<mirror::ArtMethod> prototype)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Basic sanity
   CHECK(!prototype->IsFinal());
@@ -3119,7 +3115,7 @@
   return init_done_;
 }
 
-bool ClassLinker::InitializeClass(const Handle<mirror::Class>& klass, bool can_init_statics,
+bool ClassLinker::InitializeClass(Handle<mirror::Class> klass, bool can_init_statics,
                                   bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
 
@@ -3286,7 +3282,7 @@
   return success;
 }
 
-bool ClassLinker::WaitForInitializeClass(const Handle<mirror::Class>& klass, Thread* self,
+bool ClassLinker::WaitForInitializeClass(Handle<mirror::Class> klass, Thread* self,
                                          ObjectLock<mirror::Class>& lock)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   while (true) {
@@ -3326,7 +3322,7 @@
   LOG(FATAL) << "Not Reached" << PrettyClass(klass.Get());
 }
 
-bool ClassLinker::ValidateSuperClassDescriptors(const Handle<mirror::Class>& klass) {
+bool ClassLinker::ValidateSuperClassDescriptors(Handle<mirror::Class> klass) {
   if (klass->IsInterface()) {
     return true;
   }
@@ -3368,18 +3364,12 @@
   return true;
 }
 
-bool ClassLinker::EnsureInitialized(const Handle<mirror::Class>& c, bool can_init_fields,
+bool ClassLinker::EnsureInitialized(Handle<mirror::Class> c, bool can_init_fields,
                                     bool can_init_parents) {
-  DCHECK(c.Get() != NULL);
-  if (c->IsInitialized()) {
-    return true;
-  }
-
-  bool success = InitializeClass(c, can_init_fields, can_init_parents);
-  if (!success) {
-    if (can_init_fields && can_init_parents) {
-      CHECK(Thread::Current()->IsExceptionPending()) << PrettyClass(c.Get());
-    }
+  DCHECK(c.Get() != nullptr);
+  const bool success = c->IsInitialized() || InitializeClass(c, can_init_fields, can_init_parents);
+  if (!success && can_init_fields && can_init_parents) {
+    CHECK(Thread::Current()->IsExceptionPending()) << PrettyClass(c.Get());
   }
   return success;
 }
@@ -3398,8 +3388,8 @@
   }
 }
 
-bool ClassLinker::LinkClass(Thread* self, const Handle<mirror::Class>& klass,
-                            const Handle<mirror::ObjectArray<mirror::Class>>& interfaces) {
+bool ClassLinker::LinkClass(Thread* self, Handle<mirror::Class> klass,
+                            Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
   if (!LinkSuperClass(klass)) {
     return false;
@@ -3420,8 +3410,7 @@
   return true;
 }
 
-bool ClassLinker::LoadSuperAndInterfaces(const Handle<mirror::Class>& klass,
-                                         const DexFile& dex_file) {
+bool ClassLinker::LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file) {
   CHECK_EQ(mirror::Class::kStatusIdx, klass->GetStatus());
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
   uint16_t super_class_idx = class_def.superclass_idx_;
@@ -3464,7 +3453,7 @@
   return true;
 }
 
-bool ClassLinker::LinkSuperClass(const Handle<mirror::Class>& klass) {
+bool ClassLinker::LinkSuperClass(Handle<mirror::Class> klass) {
   CHECK(!klass->IsPrimitive());
   mirror::Class* super = klass->GetSuperClass();
   if (klass.Get() == GetClassRoot(kJavaLangObject)) {
@@ -3524,8 +3513,8 @@
 }
 
 // Populate the class vtable and itable. Compute return type indices.
-bool ClassLinker::LinkMethods(const Handle<mirror::Class>& klass,
-                              const Handle<mirror::ObjectArray<mirror::Class>>& interfaces) {
+bool ClassLinker::LinkMethods(Handle<mirror::Class> klass,
+                              Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   if (klass->IsInterface()) {
     // No vtable.
     size_t count = klass->NumVirtualMethods();
@@ -3545,7 +3534,7 @@
   return true;
 }
 
-bool ClassLinker::LinkVirtualMethods(const Handle<mirror::Class>& klass) {
+bool ClassLinker::LinkVirtualMethods(Handle<mirror::Class> klass) {
   Thread* self = Thread::Current();
   if (klass->HasSuperClass()) {
     uint32_t max_count = (klass->NumVirtualMethods() +
@@ -3632,9 +3621,8 @@
   return true;
 }
 
-bool ClassLinker::LinkInterfaceMethods(
-    const Handle<mirror::Class>& klass,
-    const Handle<mirror::ObjectArray<mirror::Class>>& interfaces) {
+bool ClassLinker::LinkInterfaceMethods(Handle<mirror::Class> klass,
+                                       Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   Thread* const self = Thread::Current();
   // Set the imt table to be all conflicts by default.
   klass->SetImTable(Runtime::Current()->GetDefaultImt());
@@ -3889,12 +3877,12 @@
   return true;
 }
 
-bool ClassLinker::LinkInstanceFields(const Handle<mirror::Class>& klass) {
+bool ClassLinker::LinkInstanceFields(Handle<mirror::Class> klass) {
   CHECK(klass.Get() != NULL);
   return LinkFields(klass, false);
 }
 
-bool ClassLinker::LinkStaticFields(const Handle<mirror::Class>& klass) {
+bool ClassLinker::LinkStaticFields(Handle<mirror::Class> klass) {
   CHECK(klass.Get() != NULL);
   size_t allocated_class_size = klass->GetClassSize();
   bool success = LinkFields(klass, true);
@@ -3933,7 +3921,7 @@
   }
 };
 
-bool ClassLinker::LinkFields(const Handle<mirror::Class>& klass, bool is_static) {
+bool ClassLinker::LinkFields(Handle<mirror::Class> klass, bool is_static) {
   size_t num_fields =
       is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
 
@@ -4029,7 +4017,7 @@
   }
 
   // We lie to the GC about the java.lang.ref.Reference.referent field, so it doesn't scan it.
-  if (!is_static && "Ljava/lang/ref/Reference;" == klass->GetDescriptor()) {
+  if (!is_static && klass->DescriptorEquals("Ljava/lang/ref/Reference;")) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
@@ -4054,7 +4042,7 @@
       FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
-      if ("Ljava/lang/ref/Reference;" == klass->GetDescriptor() &&
+      if (klass->DescriptorEquals("Ljava/lang/ref/Reference;") &&
           strcmp("referent", fh.GetName()) == 0) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
@@ -4093,7 +4081,7 @@
 
 //  Set the bitmap of reference offsets, refOffsets, from the ifields
 //  list.
-void ClassLinker::CreateReferenceInstanceOffsets(const Handle<mirror::Class>& klass) {
+void ClassLinker::CreateReferenceInstanceOffsets(Handle<mirror::Class> klass) {
   uint32_t reference_offsets = 0;
   mirror::Class* super_class = klass->GetSuperClass();
   if (super_class != NULL) {
@@ -4107,11 +4095,11 @@
   CreateReferenceOffsets(klass, false, reference_offsets);
 }
 
-void ClassLinker::CreateReferenceStaticOffsets(const Handle<mirror::Class>& klass) {
+void ClassLinker::CreateReferenceStaticOffsets(Handle<mirror::Class> klass) {
   CreateReferenceOffsets(klass, true, 0);
 }
 
-void ClassLinker::CreateReferenceOffsets(const Handle<mirror::Class>& klass, bool is_static,
+void ClassLinker::CreateReferenceOffsets(Handle<mirror::Class> klass, bool is_static,
                                          uint32_t reference_offsets) {
   size_t num_reference_fields =
       is_static ? klass->NumReferenceStaticFieldsDuringLinking()
@@ -4144,7 +4132,7 @@
 }
 
 mirror::String* ClassLinker::ResolveString(const DexFile& dex_file, uint32_t string_idx,
-                                           const Handle<mirror::DexCache>& dex_cache) {
+                                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
   mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != NULL) {
@@ -4166,8 +4154,8 @@
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                                        const Handle<mirror::DexCache>& dex_cache,
-                                        const Handle<mirror::ClassLoader>& class_loader) {
+                                        Handle<mirror::DexCache> dex_cache,
+                                        Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != NULL);
   mirror::Class* resolved = dex_cache->GetResolvedType(type_idx);
   if (resolved == NULL) {
@@ -4198,16 +4186,15 @@
   return resolved;
 }
 
-mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
-                                              uint32_t method_idx,
-                                              const Handle<mirror::DexCache>& dex_cache,
-                                              const Handle<mirror::ClassLoader>& class_loader,
-                                              mirror::ArtMethod* referrer,
+mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file, uint32_t method_idx,
+                                              Handle<mirror::DexCache> dex_cache,
+                                              Handle<mirror::ClassLoader> class_loader,
+                                              Handle<mirror::ArtMethod> referrer,
                                               InvokeType type) {
   DCHECK(dex_cache.Get() != NULL);
   // Check for hit in the dex cache.
   mirror::ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx);
-  if (resolved != NULL && !resolved->IsRuntimeMethod()) {
+  if (resolved != nullptr && !resolved->IsRuntimeMethod()) {
     return resolved;
   }
   // Fail, get the declaring class.
@@ -4282,7 +4269,7 @@
     }
 
     // If we found something, check that it can be accessed by the referrer.
-    if (resolved != NULL && referrer != NULL) {
+    if (resolved != NULL && referrer.Get() != NULL) {
       mirror::Class* methods_class = resolved->GetDeclaringClass();
       mirror::Class* referring_class = referrer->GetDeclaringClass();
       if (!referring_class->CanAccess(methods_class)) {
@@ -4302,11 +4289,11 @@
       case kDirect:
       case kStatic:
         if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer);
+          ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
         } else {
           resolved = klass->FindInterfaceMethod(name, signature);
           if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer);
+            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
           } else {
             ThrowNoSuchMethodError(type, klass, name, signature);
           }
@@ -4314,11 +4301,11 @@
         break;
       case kInterface:
         if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer);
+          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
         } else {
           resolved = klass->FindVirtualMethod(name, signature);
           if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer);
+            ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
           } else {
             ThrowNoSuchMethodError(type, klass, name, signature);
           }
@@ -4329,11 +4316,11 @@
         break;
       case kVirtual:
         if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer);
+          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
         } else {
           resolved = klass->FindInterfaceMethod(name, signature);
           if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer);
+            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
           } else {
             ThrowNoSuchMethodError(type, klass, name, signature);
           }
@@ -4346,8 +4333,8 @@
 }
 
 mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
-                                            const Handle<mirror::DexCache>& dex_cache,
-                                            const Handle<mirror::ClassLoader>& class_loader,
+                                            Handle<mirror::DexCache> dex_cache,
+                                            Handle<mirror::ClassLoader> class_loader,
                                             bool is_static) {
   DCHECK(dex_cache.Get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
@@ -4389,8 +4376,8 @@
 
 mirror::ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
                                                uint32_t field_idx,
-                                               const Handle<mirror::DexCache>& dex_cache,
-                                               const Handle<mirror::ClassLoader>& class_loader) {
+                                               Handle<mirror::DexCache> dex_cache,
+                                               Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 54805be..a8271ed 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -75,7 +75,7 @@
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
   mirror::Class* FindClass(Thread* self, const char* descriptor,
-                           const Handle<mirror::ClassLoader>& class_loader)
+                           Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Finds a class by its descriptor using the "system" class loader, ie by searching the
@@ -92,7 +92,7 @@
 
   // Define a new a class based on a ClassDef from a DexFile
   mirror::Class* DefineClass(const char* descriptor,
-                             const Handle<mirror::ClassLoader>& class_loader,
+                             Handle<mirror::ClassLoader> class_loader,
                              const DexFile& dex_file, const DexFile::ClassDef& dex_class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -136,7 +136,7 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
   mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
-                                const Handle<mirror::DexCache>& dex_cache)
+                                Handle<mirror::DexCache> dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
@@ -159,8 +159,8 @@
   // type, since it may be referenced from but not contained within
   // the given DexFile.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                             const Handle<mirror::DexCache>& dex_cache,
-                             const Handle<mirror::ClassLoader>& class_loader)
+                             Handle<mirror::DexCache> dex_cache,
+                             Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a method with a given ID from the DexFile, storing the
@@ -170,16 +170,21 @@
   // virtual method.
   mirror::ArtMethod* ResolveMethod(const DexFile& dex_file,
                                    uint32_t method_idx,
-                                   const Handle<mirror::DexCache>& dex_cache,
-                                   const Handle<mirror::ClassLoader>& class_loader,
-                                   mirror::ArtMethod* referrer,
+                                   Handle<mirror::DexCache> dex_cache,
+                                   Handle<mirror::ClassLoader> class_loader,
+                                   Handle<mirror::ArtMethod> referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* ResolveMethod(uint32_t method_idx, mirror::ArtMethod* referrer,
+  mirror::ArtMethod* GetResolvedMethod(uint32_t method_idx, mirror::ArtMethod* referrer,
+                                       InvokeType type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, mirror::ArtMethod** referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::ArtField* GetResolvedField(uint32_t field_idx, mirror::Class* field_declaring_class)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ArtField* ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -191,8 +196,8 @@
   // field.
   mirror::ArtField* ResolveField(const DexFile& dex_file,
                                  uint32_t field_idx,
-                                 const Handle<mirror::DexCache>& dex_cache,
-                                 const Handle<mirror::ClassLoader>& class_loader,
+                                 Handle<mirror::DexCache> dex_cache,
+                                 Handle<mirror::ClassLoader> class_loader,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -200,10 +205,9 @@
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. No is_static argument is provided so that Java
   // field resolution semantics are followed.
-  mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file,
-                                    uint32_t field_idx,
-                                    const Handle<mirror::DexCache>& dex_cache,
-                                    const Handle<mirror::ClassLoader>& class_loader)
+  mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
+                                    Handle<mirror::DexCache> dex_cache,
+                                    Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get shorty from method index without resolution. Used to do handlerization.
@@ -213,8 +217,7 @@
   // Returns true on success, false if there's an exception pending.
   // can_run_clinit=false allows the compiler to attempt to init a class,
   // given the restriction that no <clinit> execution is possible.
-  bool EnsureInitialized(const Handle<mirror::Class>& c,
-                         bool can_init_fields, bool can_init_parents)
+  bool EnsureInitialized(Handle<mirror::Class> c, bool can_init_fields, bool can_init_parents)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Initializes classes that have instances in the image but that have
@@ -224,7 +227,7 @@
   void RegisterDexFile(const DexFile& dex_file)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void RegisterDexFile(const DexFile& dex_file, const Handle<mirror::DexCache>& dex_cache)
+  void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -316,12 +319,12 @@
                                                                               size_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VerifyClass(const Handle<mirror::Class>& klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VerifyClass(Handle<mirror::Class> klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file, mirror::Class* klass,
                                mirror::Class::Status& oat_file_class_status)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
-                                         const Handle<mirror::Class>& klass)
+                                         Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, mirror::ArtMethod* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -420,12 +423,12 @@
 
 
   mirror::Class* CreateArrayClass(Thread* self, const char* descriptor,
-                                  const Handle<mirror::ClassLoader>& class_loader)
+                                  Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AppendToBootClassPath(const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void AppendToBootClassPath(const DexFile& dex_file, const Handle<mirror::DexCache>& dex_cache)
+  void AppendToBootClassPath(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ConstructFieldMap(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
@@ -437,23 +440,23 @@
 
   void LoadClass(const DexFile& dex_file,
                  const DexFile::ClassDef& dex_class_def,
-                 const Handle<mirror::Class>& klass,
+                 Handle<mirror::Class> klass,
                  mirror::ClassLoader* class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void LoadClassMembers(const DexFile& dex_file,
                         const byte* class_data,
-                        const Handle<mirror::Class>& klass,
+                        Handle<mirror::Class> klass,
                         mirror::ClassLoader* class_loader,
                         const OatFile::OatClass* oat_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void LoadField(const DexFile& dex_file, const ClassDataItemIterator& it,
-                 const Handle<mirror::Class>& klass, const Handle<mirror::ArtField>& dst)
+                 Handle<mirror::Class> klass, Handle<mirror::ArtField> dst)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::ArtMethod* LoadMethod(Thread* self, const DexFile& dex_file,
                                 const ClassDataItemIterator& dex_method,
-                                const Handle<mirror::Class>& klass)
+                                Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FixupStaticTrampolines(mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -462,23 +465,23 @@
   OatFile::OatClass GetOatClass(const DexFile& dex_file, uint16_t class_def_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void RegisterDexFileLocked(const DexFile& dex_file, const Handle<mirror::DexCache>& dex_cache)
+  void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       EXCLUSIVE_LOCKS_REQUIRED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsDexFileRegisteredLocked(const DexFile& dex_file) const
       SHARED_LOCKS_REQUIRED(dex_lock_, Locks::mutator_lock_);
 
-  bool InitializeClass(const Handle<mirror::Class>& klass, bool can_run_clinit,
+  bool InitializeClass(Handle<mirror::Class> klass, bool can_run_clinit,
                        bool can_init_parents)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool WaitForInitializeClass(const Handle<mirror::Class>& klass, Thread* self,
+  bool WaitForInitializeClass(Handle<mirror::Class> klass, Thread* self,
                               ObjectLock<mirror::Class>& lock);
-  bool ValidateSuperClassDescriptors(const Handle<mirror::Class>& klass)
+  bool ValidateSuperClassDescriptors(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
-                                                Handle<mirror::ClassLoader>& class_loader1,
-                                                Handle<mirror::ClassLoader>& class_loader2)
+                                                Handle<mirror::ClassLoader> class_loader1,
+                                                Handle<mirror::ClassLoader> class_loader2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsSameMethodSignatureInDifferentClassContexts(Thread* self, mirror::ArtMethod* method,
@@ -486,43 +489,43 @@
                                                      mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkClass(Thread* self, const Handle<mirror::Class>& klass,
-                 const Handle<mirror::ObjectArray<mirror::Class>>& interfaces)
+  bool LinkClass(Thread* self, Handle<mirror::Class> klass,
+                 Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkSuperClass(const Handle<mirror::Class>& klass)
+  bool LinkSuperClass(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LoadSuperAndInterfaces(const Handle<mirror::Class>& klass, const DexFile& dex_file)
+  bool LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkMethods(const Handle<mirror::Class>& klass,
-                   const Handle<mirror::ObjectArray<mirror::Class>>& interfaces)
+  bool LinkMethods(Handle<mirror::Class> klass,
+                   Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkVirtualMethods(const Handle<mirror::Class>& klass)
+  bool LinkVirtualMethods(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkInterfaceMethods(const Handle<mirror::Class>& klass,
-                            const Handle<mirror::ObjectArray<mirror::Class>>& interfaces)
+  bool LinkInterfaceMethods(Handle<mirror::Class> klass,
+                            Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkStaticFields(const Handle<mirror::Class>& klass)
+  bool LinkStaticFields(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool LinkInstanceFields(const Handle<mirror::Class>& klass)
+  bool LinkInstanceFields(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool LinkFields(const Handle<mirror::Class>& klass, bool is_static)
+  bool LinkFields(Handle<mirror::Class> klass, bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void LinkCode(const Handle<mirror::ArtMethod>& method, const OatFile::OatClass* oat_class,
+  void LinkCode(Handle<mirror::ArtMethod> method, const OatFile::OatClass* oat_class,
                 const DexFile& dex_file, uint32_t dex_method_index, uint32_t method_index)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
-  void CreateReferenceInstanceOffsets(const Handle<mirror::Class>& klass)
+  void CreateReferenceInstanceOffsets(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CreateReferenceStaticOffsets(const Handle<mirror::Class>& klass)
+  void CreateReferenceStaticOffsets(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CreateReferenceOffsets(const Handle<mirror::Class>& klass, bool is_static,
+  void CreateReferenceOffsets(Handle<mirror::Class> klass, bool is_static,
                               uint32_t reference_offsets)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -551,11 +554,11 @@
                                                  bool* open_failed)
       LOCKS_EXCLUDED(dex_lock_);
 
-  mirror::ArtMethod* CreateProxyConstructor(Thread* self, const Handle<mirror::Class>& klass,
+  mirror::ArtMethod* CreateProxyConstructor(Thread* self, Handle<mirror::Class> klass,
                                             mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtMethod* CreateProxyMethod(Thread* self, const Handle<mirror::Class>& klass,
-                                       const Handle<mirror::ArtMethod>& prototype)
+  mirror::ArtMethod* CreateProxyMethod(Thread* self, Handle<mirror::Class> klass,
+                                       Handle<mirror::ArtMethod> prototype)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index d04f02b..c11aecc 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -108,7 +108,7 @@
     AssertArrayClass(array_descriptor, array);
   }
 
-  void AssertArrayClass(const std::string& array_descriptor, const Handle<mirror::Class>& array)
+  void AssertArrayClass(const std::string& array_descriptor, Handle<mirror::Class> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ASSERT_TRUE(array.Get() != NULL);
     ASSERT_TRUE(array->GetClass() != NULL);
@@ -178,7 +178,7 @@
     EXPECT_TRUE(fh.GetType() != NULL);
   }
 
-  void AssertClass(const std::string& descriptor, const Handle<mirror::Class>& klass)
+  void AssertClass(const std::string& descriptor, Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     EXPECT_STREQ(descriptor.c_str(), klass->GetDescriptor().c_str());
     if (descriptor == "Ljava/lang/Object;") {
@@ -846,8 +846,7 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<1> hs(soa.Self());
-  auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
+  NullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
   FieldHelper fh(c->GetIFields()->Get(0));
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index f1795a5..58b4286 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -385,31 +385,36 @@
 
 template<InvokeType type, bool access_check>
 static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
-                                                    mirror::Object* this_object,
-                                                    mirror::ArtMethod* referrer, Thread* self) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<1> hs(self);
-  Handle<mirror::Object> handle_scope_this(hs.NewHandle(type == kStatic ? nullptr : this_object));
-  mirror::ArtMethod* resolved_method = class_linker->ResolveMethod(method_idx, referrer, type);
+                                                    mirror::Object** this_object,
+                                                    mirror::ArtMethod** referrer, Thread* self) {
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtMethod* resolved_method = class_linker->GetResolvedMethod(method_idx, *referrer, type);
+  if (resolved_method == nullptr) {
+    StackHandleScope<1> hs(self);
+    mirror::Object* null_this = nullptr;
+    HandleWrapper<mirror::Object> h_this(
+        hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
+    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
+  }
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(handle_scope_this.Get() == nullptr && type != kStatic)) {
+  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
     // Maintain interpreter-like semantics where NullPointerException is thrown
     // after potential NoSuchMethodError from class linker.
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-    DCHECK(referrer == throw_location.GetMethod());
+    DCHECK_EQ(*referrer, throw_location.GetMethod());
     ThrowNullPointerExceptionForMethodAccess(throw_location, method_idx, type);
     return nullptr;  // Failure.
   } else if (access_check) {
     // Incompatible class change should have been handled in resolve method.
     if (UNLIKELY(resolved_method->CheckIncompatibleClassChange(type))) {
       ThrowIncompatibleClassChangeError(type, resolved_method->GetInvokeType(), resolved_method,
-                                        referrer);
+                                        *referrer);
       return nullptr;  // Failure.
     }
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
+    mirror::Class* referring_class = (*referrer)->GetDeclaringClass();
     bool can_access_resolved_method =
         referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
                                                          method_idx);
@@ -423,7 +428,7 @@
     case kDirect:
       return resolved_method;
     case kVirtual: {
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = handle_scope_this->GetClass()->GetVTable();
+      mirror::ObjectArray<mirror::ArtMethod>* vtable = (*this_object)->GetClass()->GetVTable();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       if (access_check &&
           (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
@@ -437,7 +442,7 @@
       return vtable->GetWithoutChecks(vtable_index);
     }
     case kSuper: {
-      mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+      mirror::Class* super_class = (*referrer)->GetDeclaringClass()->GetSuperClass();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       mirror::ObjectArray<mirror::ArtMethod>* vtable;
       if (access_check) {
@@ -460,20 +465,19 @@
     }
     case kInterface: {
       uint32_t imt_index = resolved_method->GetDexMethodIndex() % ClassLinker::kImtSize;
-      mirror::ObjectArray<mirror::ArtMethod>* imt_table = handle_scope_this->GetClass()->GetImTable();
+      mirror::ObjectArray<mirror::ArtMethod>* imt_table = (*this_object)->GetClass()->GetImTable();
       mirror::ArtMethod* imt_method = imt_table->Get(imt_index);
       if (!imt_method->IsImtConflictMethod()) {
         return imt_method;
       } else {
         mirror::ArtMethod* interface_method =
-            handle_scope_this->GetClass()->FindVirtualMethodForInterface(resolved_method);
+            (*this_object)->GetClass()->FindVirtualMethodForInterface(resolved_method);
         if (UNLIKELY(interface_method == nullptr)) {
           ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method,
-                                                                     handle_scope_this.Get(), referrer);
+                                                                     *this_object, *referrer);
           return nullptr;  // Failure.
-        } else {
-          return interface_method;
         }
+        return interface_method;
       }
     }
     default:
@@ -486,8 +490,8 @@
 #define EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, _access_check)                 \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                       \
   mirror::ArtMethod* FindMethodFromCode<_type, _access_check>(uint32_t method_idx,         \
-                                                              mirror::Object* this_object, \
-                                                              mirror::ArtMethod* referrer, \
+                                                              mirror::Object** this_object, \
+                                                              mirror::ArtMethod** referrer, \
                                                               Thread* self)
 #define EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
     EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, false);   \
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 3f02ec7..f2e2bf7 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -25,6 +25,7 @@
 
 namespace art {
 
+// TODO: Make the MethodHelper here be compaction safe.
 extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
                                                    const DexFile::CodeItem* code_item,
                                                    ShadowFrame* shadow_frame, JValue* result) {
@@ -43,6 +44,8 @@
       }
       self->PopShadowFrame();
       CHECK(h_class->IsInitializing());
+      // Reload from shadow frame in case the method moved, this is faster than adding a handle.
+      method = shadow_frame->GetMethod();
     }
   }
   uint16_t arg_offset = (code_item == NULL) ? 0 : code_item->registers_size_ - code_item->ins_size_;
diff --git a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
index d34b097..3a898e8 100644
--- a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
@@ -23,17 +23,20 @@
 
 template<InvokeType type, bool access_check>
 mirror::ArtMethod* FindMethodHelper(uint32_t method_idx, mirror::Object* this_object,
-                                    mirror::ArtMethod* caller_method, Thread* thread) {
+                                    mirror::ArtMethod* caller_method, Thread* self) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method,
                                              access_check, type);
   if (UNLIKELY(method == NULL)) {
-    method = FindMethodFromCode<type, access_check>(method_idx, this_object, caller_method, thread);
+    // Note: This can cause thread suspension.
+    self->AssertThreadSuspensionIsAllowable();
+    method = FindMethodFromCode<type, access_check>(method_idx, &this_object, &caller_method,
+                                                    self);
     if (UNLIKELY(method == NULL)) {
-      CHECK(thread->IsExceptionPending());
+      CHECK(self->IsExceptionPending());
       return 0;  // failure
     }
   }
-  DCHECK(!thread->IsExceptionPending());
+  DCHECK(!self->IsExceptionPending());
   const void* code = method->GetEntryPointFromPortableCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 17c3222..3756f47 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -317,11 +317,11 @@
 // Lazily resolve a method for portable. Called by stub code.
 extern "C" const void* artPortableResolutionTrampoline(mirror::ArtMethod* called,
                                                        mirror::Object* receiver,
-                                                       Thread* thread,
+                                                       Thread* self,
                                                        mirror::ArtMethod** called_addr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t dex_pc;
-  mirror::ArtMethod* caller = thread->GetCurrentMethod(&dex_pc);
+  mirror::ArtMethod* caller = self->GetCurrentMethod(&dex_pc);
 
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
   InvokeType invoke_type;
@@ -379,7 +379,7 @@
         is_range = true;
     }
     uint32_t dex_method_idx = (is_range) ? instr->VRegB_3rc() : instr->VRegB_35c();
-    called = linker->ResolveMethod(dex_method_idx, caller, invoke_type);
+    called = linker->ResolveMethod(Thread::Current(), dex_method_idx, &caller, invoke_type);
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type));
     // Refine called method based on receiver.
@@ -395,9 +395,9 @@
     CHECK(!called->CheckIncompatibleClassChange(invoke_type));
   }
   const void* code = nullptr;
-  if (LIKELY(!thread->IsExceptionPending())) {
+  if (LIKELY(!self->IsExceptionPending())) {
     // Ensure that the called method's class is initialized.
-    StackHandleScope<1> hs(Thread::Current());
+    StackHandleScope<1> hs(self);
     Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index ee276c1..554bff4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -755,11 +755,12 @@
   self->EndAssertNoThreadSuspension(old_cause);
   bool virtual_or_interface = invoke_type == kVirtual || invoke_type == kInterface;
   // Resolve method filling in dex cache.
-  if (called->IsRuntimeMethod()) {
+  if (UNLIKELY(called->IsRuntimeMethod())) {
     StackHandleScope<1> hs(self);
-    Handle<mirror::Object> handle_scope_receiver(hs.NewHandle(virtual_or_interface ? receiver : nullptr));
-    called = linker->ResolveMethod(dex_method_idx, caller, invoke_type);
-    receiver = handle_scope_receiver.Get();
+    mirror::Object* dummy = nullptr;
+    HandleWrapper<mirror::Object> h_receiver(
+        hs.NewHandleWrapper(virtual_or_interface ? &receiver : &dummy));
+    called = linker->ResolveMethod(self, dex_method_idx, &caller, invoke_type);
   }
   const void* code = NULL;
   if (LIKELY(!self->IsExceptionPending())) {
@@ -1313,7 +1314,7 @@
                       &cur_stack_arg_, &cur_gpr_reg_, &cur_fpr_reg_, &code_return_,
                       &alloca_used_size_);
     handle_scope_number_of_references_ = 0;
-    cur_hs_entry_ = reinterpret_cast<StackReference<mirror::Object>*>(GetFirstHandleScopeEntry());
+    cur_hs_entry_ = GetFirstHandleScopeEntry();
 
     // jni environment is always first argument
     sm_.AdvancePointer(self->GetJniEnv());
@@ -1327,7 +1328,13 @@
 
   void FinalizeHandleScope(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  jobject GetFirstHandleScopeEntry() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  StackReference<mirror::Object>* GetFirstHandleScopeEntry()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return handle_scope_->GetHandle(0).GetReference();
+  }
+
+  jobject GetFirstHandleScopeJObject()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return handle_scope_->GetHandle(0).ToJObject();
   }
 
@@ -1502,7 +1509,7 @@
   // Start JNI, save the cookie.
   uint32_t cookie;
   if (called->IsSynchronized()) {
-    cookie = JniMethodStartSynchronized(visitor.GetFirstHandleScopeEntry(), self);
+    cookie = JniMethodStartSynchronized(visitor.GetFirstHandleScopeJObject(), self);
     if (self->IsExceptionPending()) {
       self->PopHandleScope();
       // A negative value denotes an error.
@@ -1530,7 +1537,7 @@
       DCHECK(self->IsExceptionPending());    // There should be an exception pending now.
 
       // End JNI, as the assembly will move to deliver the exception.
-      jobject lock = called->IsSynchronized() ? visitor.GetFirstHandleScopeEntry() : nullptr;
+      jobject lock = called->IsSynchronized() ? visitor.GetFirstHandleScopeJObject() : nullptr;
       if (mh.GetShorty()[0] == 'L') {
         artQuickGenericJniEndJNIRef(self, cookie, nullptr, lock);
       } else {
@@ -1681,7 +1688,8 @@
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
       RememberForGcArgumentVisitor visitor(sp, type == kStatic, shorty, shorty_len, &soa);
       visitor.VisitArguments();
-      method = FindMethodFromCode<type, access_check>(method_idx, this_object, caller_method, self);
+      method = FindMethodFromCode<type, access_check>(method_idx, &this_object, &caller_method,
+                                                      self);
       visitor.FixupReferences();
     }
 
@@ -1871,7 +1879,7 @@
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
       RememberForGcArgumentVisitor visitor(sp, false, shorty, shorty_len, &soa);
       visitor.VisitArguments();
-      method = FindMethodFromCode<kInterface, false>(dex_method_idx, this_object, caller_method,
+      method = FindMethodFromCode<kInterface, false>(dex_method_idx, &this_object, &caller_method,
                                                      self);
       visitor.FixupReferences();
     }
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 979970c..bd04473 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -35,8 +35,8 @@
 class AtomicStack {
  public:
   // Capacity is how many elements we can store in the stack.
-  static AtomicStack* Create(const std::string& name, size_t capacity) {
-    std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, capacity));
+  static AtomicStack* Create(const std::string& name, size_t growth_limit, size_t capacity) {
+    std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, growth_limit, capacity));
     mark_stack->Init();
     return mark_stack.release();
   }
@@ -44,7 +44,7 @@
   ~AtomicStack() {}
 
   void Reset() {
-    DCHECK(mem_map_.get() != NULL);
+    DCHECK(mem_map_.get() != nullptr);
     DCHECK(begin_ != NULL);
     front_index_.StoreRelaxed(0);
     back_index_.StoreRelaxed(0);
@@ -58,20 +58,13 @@
   // Beware: Mixing atomic pushes and atomic pops will cause ABA problem.
 
   // Returns false if we overflowed the stack.
+  bool AtomicPushBackIgnoreGrowthLimit(const T& value) {
+    return AtomicPushBackInternal(value, capacity_);
+  }
+
+  // Returns false if we overflowed the stack.
   bool AtomicPushBack(const T& value) {
-    if (kIsDebugBuild) {
-      debug_is_sorted_ = false;
-    }
-    int32_t index;
-    do {
-      index = back_index_.LoadRelaxed();
-      if (UNLIKELY(static_cast<size_t>(index) >= capacity_)) {
-        // Stack overflow.
-        return false;
-      }
-    } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
-    begin_[index] = value;
-    return true;
+    return AtomicPushBackInternal(value, growth_limit_);
   }
 
   // Atomically bump the back index by the given number of
@@ -85,7 +78,7 @@
     do {
       index = back_index_.LoadRelaxed();
       new_index = index + num_slots;
-      if (UNLIKELY(static_cast<size_t>(new_index) >= capacity_)) {
+      if (UNLIKELY(static_cast<size_t>(new_index) >= growth_limit_)) {
         // Stack overflow.
         return false;
       }
@@ -115,7 +108,7 @@
       debug_is_sorted_ = false;
     }
     int32_t index = back_index_.LoadRelaxed();
-    DCHECK_LT(static_cast<size_t>(index), capacity_);
+    DCHECK_LT(static_cast<size_t>(index), growth_limit_);
     back_index_.StoreRelaxed(index + 1);
     begin_[index] = value;
   }
@@ -165,6 +158,7 @@
   // Will clear the stack.
   void Resize(size_t new_capacity) {
     capacity_ = new_capacity;
+    growth_limit_ = new_capacity;
     Init();
   }
 
@@ -189,15 +183,33 @@
   }
 
  private:
-  AtomicStack(const std::string& name, const size_t capacity)
+  AtomicStack(const std::string& name, size_t growth_limit, size_t capacity)
       : name_(name),
         back_index_(0),
         front_index_(0),
-        begin_(NULL),
+        begin_(nullptr),
+        growth_limit_(growth_limit),
         capacity_(capacity),
         debug_is_sorted_(true) {
   }
 
+  // Returns false if we overflowed the stack.
+  bool AtomicPushBackInternal(const T& value, size_t limit) ALWAYS_INLINE {
+    if (kIsDebugBuild) {
+      debug_is_sorted_ = false;
+    }
+    int32_t index;
+    do {
+      index = back_index_.LoadRelaxed();
+      if (UNLIKELY(static_cast<size_t>(index) >= limit)) {
+        // Stack overflow.
+        return false;
+      }
+    } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
+    begin_[index] = value;
+    return true;
+  }
+
   // Size in number of elements.
   void Init() {
     std::string error_msg;
@@ -213,22 +225,18 @@
 
   // Name of the mark stack.
   std::string name_;
-
   // Memory mapping of the atomic stack.
   std::unique_ptr<MemMap> mem_map_;
-
   // Back index (index after the last element pushed).
   AtomicInteger back_index_;
-
   // Front index, used for implementing PopFront.
   AtomicInteger front_index_;
-
   // Base of the atomic stack.
   T* begin_;
-
+  // Current maximum which we can push back to, must be <= capacity_.
+  size_t growth_limit_;
   // Maximum number of elements.
   size_t capacity_;
-
   // Whether or not the stack is sorted, only updated in debug mode to avoid performance overhead.
   bool debug_is_sorted_;
 
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 03b72b6..58ba61b 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -137,33 +137,11 @@
 
 inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object** obj) {
   if (kUseThreadLocalAllocationStack) {
-    bool success = self->PushOnThreadLocalAllocationStack(*obj);
-    if (UNLIKELY(!success)) {
-      // Slow path. Allocate a new thread-local allocation stack.
-      mirror::Object** start_address;
-      mirror::Object** end_address;
-      while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize,
-                                                &start_address, &end_address)) {
-        // TODO: Add handle VerifyObject.
-        StackHandleScope<1> hs(self);
-        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
-        CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-      }
-      self->SetThreadLocalAllocationStack(start_address, end_address);
-      // Retry on the new thread-local allocation stack.
-      success = self->PushOnThreadLocalAllocationStack(*obj);
-      // Must succeed.
-      CHECK(success);
+    if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(*obj))) {
+      PushOnThreadLocalAllocationStackWithInternalGC(self, obj);
     }
-  } else {
-    // This is safe to do since the GC will never free objects which are neither in the allocation
-    // stack or the live bitmap.
-    while (!allocation_stack_->AtomicPushBack(*obj)) {
-      // TODO: Add handle VerifyObject.
-      StackHandleScope<1> hs(self);
-      HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
-      CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-    }
+  } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(*obj))) {
+    PushOnAllocationStackWithInternalGC(self, obj);
   }
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index fdc4367..a6093ca 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -84,9 +84,14 @@
 static constexpr double kStickyGcThroughputAdjustment = 1.0;
 // Whether or not we use the free list large object space.
 static constexpr bool kUseFreeListSpaceForLOS = false;
-// Whtehr or not we compact the zygote in PreZygoteFork.
+// Whether or not we compact the zygote in PreZygoteFork.
 static constexpr bool kCompactZygote = kMovingCollector;
 static constexpr size_t kNonMovingSpaceCapacity = 64 * MB;
+// How many reserve entries are at the end of the allocation stack, these are only needed if the
+// allocation stack overflows.
+static constexpr size_t kAllocationStackReserveSize = 1024;
+// Default mark stack size in bytes.
+static const size_t kDefaultMarkStackSize = 64 * KB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
@@ -295,13 +300,13 @@
   // TODO: Count objects in the image space here.
   num_bytes_allocated_.StoreRelaxed(0);
 
-  // Default mark stack size in bytes.
-  static const size_t default_mark_stack_size = 64 * KB;
-  mark_stack_.reset(accounting::ObjectStack::Create("mark stack", default_mark_stack_size));
-  allocation_stack_.reset(accounting::ObjectStack::Create("allocation stack",
-                                                          max_allocation_stack_size_));
-  live_stack_.reset(accounting::ObjectStack::Create("live stack",
-                                                    max_allocation_stack_size_));
+  mark_stack_.reset(accounting::ObjectStack::Create("mark stack", kDefaultMarkStackSize,
+                                                    kDefaultMarkStackSize));
+  const size_t alloc_stack_capacity = max_allocation_stack_size_ + kAllocationStackReserveSize;
+  allocation_stack_.reset(accounting::ObjectStack::Create(
+      "allocation stack", max_allocation_stack_size_, alloc_stack_capacity));
+  live_stack_.reset(accounting::ObjectStack::Create(
+      "live stack", max_allocation_stack_size_, alloc_stack_capacity));
 
   // It's still too early to take a lock because there are no threads yet, but we can create locks
   // now. We don't create it earlier to make it clear that you can't use locks during heap
@@ -2035,6 +2040,43 @@
   const bool verify_referent_;
 };
 
+void Heap::PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
+  // Slow path, the allocation stack push back must have already failed.
+  DCHECK(!allocation_stack_->AtomicPushBack(*obj));
+  do {
+    // TODO: Add handle VerifyObject.
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+    // Push our object into the reserve region of the allocaiton stack. This is only required due
+    // to heap verification requiring that roots are live (either in the live bitmap or in the
+    // allocation stack).
+    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(*obj));
+    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+  } while (!allocation_stack_->AtomicPushBack(*obj));
+}
+
+void Heap::PushOnThreadLocalAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
+  // Slow path, the allocation stack push back must have already failed.
+  DCHECK(!self->PushOnThreadLocalAllocationStack(*obj));
+  mirror::Object** start_address;
+  mirror::Object** end_address;
+  while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize, &start_address,
+                                            &end_address)) {
+    // TODO: Add handle VerifyObject.
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+    // Push our object into the reserve region of the allocaiton stack. This is only required due
+    // to heap verification requiring that roots are live (either in the live bitmap or in the
+    // allocation stack).
+    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(*obj));
+    // Push into the reserve allocation stack.
+    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+  }
+  self->SetThreadLocalAllocationStack(start_address, end_address);
+  // Retry on the new thread-local allocation stack.
+  CHECK(self->PushOnThreadLocalAllocationStack(*obj));  // Must succeed.
+}
+
 // Must do this with mutators suspended since we are directly accessing the allocation stacks.
 size_t Heap::VerifyHeapReferences(bool verify_referents) {
   Thread* self = Thread::Current();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 887b17e..e11671b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -698,6 +698,10 @@
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index e63cc39..54a63f0 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -141,8 +141,10 @@
 size_t LargeObjectMapSpace::Free(Thread* self, mirror::Object* ptr) {
   MutexLock mu(self, lock_);
   MemMaps::iterator found = mem_maps_.find(ptr);
-  CHECK(found != mem_maps_.end()) << "Attempted to free large object" << ptr
-      << "which was not live";
+  if (UNLIKELY(found == mem_maps_.end())) {
+    Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
+    LOG(FATAL) << "Attempted to free large object " << ptr << " which was not live";
+  }
   DCHECK_GE(num_bytes_allocated_, found->second->Size());
   size_t allocation_size = found->second->Size();
   num_bytes_allocated_ -= allocation_size;
diff --git a/runtime/handle.h b/runtime/handle.h
index 3127864..b70f651 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -53,29 +53,48 @@
     reference_->Assign(reference);
     return old;
   }
-  jobject ToJObject() const ALWAYS_INLINE {
+  jobject ToJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
+      // Special case so that we work with NullHandles.
+      return nullptr;
+    }
     return reinterpret_cast<jobject>(reference_);
   }
 
- private:
+ protected:
   StackReference<T>* reference_;
 
   template<typename S>
   explicit Handle(StackReference<S>* reference)
       : reference_(reinterpret_cast<StackReference<T>*>(reference)) {
   }
-
   template<typename S>
   explicit Handle(const Handle<S>& handle)
       : reference_(reinterpret_cast<StackReference<T>*>(handle.reference_)) {
   }
 
+  StackReference<T>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    return reference_;
+  }
+
+ private:
+  friend class BuildGenericJniFrameVisitor;
   template<class S> friend class Handle;
   friend class HandleScope;
   template<class S> friend class HandleWrapper;
   template<size_t kNumReferences> friend class StackHandleScope;
 };
 
+template<class T>
+class NullHandle : public Handle<T> {
+ public:
+  NullHandle() : Handle<T>(&null_ref_) {
+  }
+
+ private:
+  StackReference<T> null_ref_;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_HANDLE_H_
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 478c74c..f77a0f6 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -524,16 +524,17 @@
   ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
-    StackHandleScope<1> hs(self);
-    Handle<Class> declaringClass(hs.NewHandle(method->GetDeclaringClass()));
-    if (UNLIKELY(!declaringClass->IsInitializing())) {
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass, true,
-                                                                            true))) {
-        DCHECK(Thread::Current()->IsExceptionPending());
+    mirror::Class* declaring_class = method->GetDeclaringClass();
+    if (UNLIKELY(!declaring_class->IsInitializing())) {
+      StackHandleScope<1> hs(self);
+      HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(
+          h_declaring_class, true, true))) {
+        DCHECK(self->IsExceptionPending());
         self->PopShadowFrame();
         return;
       }
-      CHECK(declaringClass->IsInitializing());
+      CHECK(h_declaring_class->IsInitializing());
     }
   }
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 418aff5..63ae6fd 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -296,11 +296,9 @@
     // other variants that take more arguments should also be added.
     std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
 
-    StackHandleScope<1> hs(self);
     // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
-    auto class_loader = hs.NewHandle<ClassLoader>(nullptr);
-    Class* found = Runtime::Current()->GetClassLinker()->FindClass(self, descriptor.c_str(),
-                                                                   class_loader);
+    Class* found = Runtime::Current()->GetClassLinker()->FindClass(
+        self, descriptor.c_str(), NullHandle<mirror::ClassLoader>());
     CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
         << PrettyDescriptor(descriptor);
     result->SetL(found);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index b42af11..cfc90a6 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -29,6 +29,7 @@
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "gc/accounting/card_table-inl.h"
+#include "handle_scope-inl.h"
 #include "nth_caller_visitor.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method.h"
@@ -112,9 +113,10 @@
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
   const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
   Object* receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
-  ArtMethod* const method = FindMethodFromCode<type, do_access_check>(method_idx, receiver,
-                                                                      shadow_frame.GetMethod(),
-                                                                      self);
+  mirror::ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const method = FindMethodFromCode<type, do_access_check>(
+      method_idx, &receiver, &sf_method, self);
+  // The shadow frame should already be pushed, so we don't need to update it.
   if (UNLIKELY(method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
@@ -348,6 +350,10 @@
     case Primitive::kPrimNot: {
       Object* reg = shadow_frame.GetVRegReference(vregA);
       if (do_assignability_check && reg != nullptr) {
+        // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
+        // object in the destructor.
+        StackHandleScope<1> hs(self);
+        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(&obj));
         Class* field_class = FieldHelper(f).GetType();
         if (!reg->VerifierInstanceOf(field_class)) {
           // This should never happen.
@@ -372,7 +378,8 @@
 // Handles iput-quick, iput-wide-quick and iput-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type, bool transaction_active>
-static SOMETIMES_INLINE_KEYWORD bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoIPutQuick(const ShadowFrame& shadow_frame,
+                                                 const Instruction* inst, uint16_t inst_data) {
   Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 6f3317d..b51e1d5 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -3138,7 +3138,7 @@
 }
 
 bool JavaVMExt::LoadNativeLibrary(const std::string& path,
-                                  const Handle<mirror::ClassLoader>& class_loader,
+                                  Handle<mirror::ClassLoader> class_loader,
                                   std::string* detail) {
   detail->clear();
 
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 37195eb..7e76e11 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -67,7 +67,7 @@
    * Returns 'true' on success. On failure, sets 'detail' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(const std::string& path, const Handle<mirror::ClassLoader>& class_loader,
+  bool LoadNativeLibrary(const std::string& path, Handle<mirror::ClassLoader> class_loader,
                          std::string* detail)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 552652c..1076643 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -42,8 +42,8 @@
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
 static Array* RecursiveCreateMultiArray(Thread* self,
-                                        const Handle<Class>& array_class, int current_dimension,
-                                        const Handle<mirror::IntArray>& dimensions)
+                                        Handle<Class> array_class, int current_dimension,
+                                        Handle<mirror::IntArray> dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
   StackHandleScope<1> hs(self);
@@ -73,8 +73,8 @@
   return new_array.Get();
 }
 
-Array* Array::CreateMultiArray(Thread* self, const Handle<Class>& element_class,
-                               const Handle<IntArray>& dimensions) {
+Array* Array::CreateMultiArray(Thread* self, Handle<Class> element_class,
+                               Handle<IntArray> dimensions) {
   // Verify dimensions.
   //
   // The caller is responsible for verifying that "dimArray" is non-null
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 1b8106e..64e2317 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -38,8 +38,8 @@
                       bool fill_usable = false)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Array* CreateMultiArray(Thread* self, const Handle<Class>& element_class,
-                                 const Handle<IntArray>& dimensions)
+  static Array* CreateMultiArray(Thread* self, Handle<Class> element_class,
+                                 Handle<IntArray> dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index af544fd..e2d3f41 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -229,7 +229,7 @@
   return 0;
 }
 
-uint32_t ArtMethod::FindCatchBlock(Handle<Class>& exception_type, uint32_t dex_pc,
+uint32_t ArtMethod::FindCatchBlock(Handle<Class> exception_type, uint32_t dex_pc,
                                    bool* has_no_move_exception, bool* exc_changed) {
   MethodHelper mh(this);
   const DexFile::CodeItem* code_item = mh.GetCodeItem();
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 34fe0bf..2e8253f 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -401,7 +401,7 @@
   // In the process of finding a catch block we might trigger resolution errors. This is flagged
   // by exc_changed, which indicates that a different exception is now stored in the thread and
   // should be reloaded.
-  uint32_t FindCatchBlock(Handle<Class>& exception_type, uint32_t dex_pc,
+  uint32_t FindCatchBlock(Handle<Class> exception_type, uint32_t dex_pc,
                           bool* has_no_move_exception, bool* exc_changed)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index d8591cc..b1de2b6 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -39,10 +39,8 @@
   java_lang_StackTraceElement_ = NULL;
 }
 
-StackTraceElement* StackTraceElement::Alloc(Thread* self,
-                                            Handle<String>& declaring_class,
-                                            Handle<String>& method_name,
-                                            Handle<String>& file_name,
+StackTraceElement* StackTraceElement::Alloc(Thread* self, Handle<String> declaring_class,
+                                            Handle<String> method_name, Handle<String> file_name,
                                             int32_t line_number) {
   StackTraceElement* trace =
       down_cast<StackTraceElement*>(GetStackTraceElement()->AllocObject(self));
@@ -57,8 +55,8 @@
 }
 
 template<bool kTransactionActive>
-void StackTraceElement::Init(Handle<String>& declaring_class, Handle<String>& method_name,
-                             Handle<String>& file_name, int32_t line_number) {
+void StackTraceElement::Init(Handle<String> declaring_class, Handle<String> method_name,
+                             Handle<String> file_name, int32_t line_number) {
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
                                      declaring_class.Get());
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 22d9b71..e094e8b 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -46,10 +46,8 @@
     return GetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_));
   }
 
-  static StackTraceElement* Alloc(Thread* self,
-                                  Handle<String>& declaring_class,
-                                  Handle<String>& method_name,
-                                  Handle<String>& file_name,
+  static StackTraceElement* Alloc(Thread* self, Handle<String> declaring_class,
+                                  Handle<String> method_name, Handle<String> file_name,
                                   int32_t line_number)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -70,8 +68,8 @@
   int32_t line_number_;
 
   template<bool kTransactionActive>
-  void Init(Handle<String>& declaring_class, Handle<String>& method_name,
-            Handle<String>& file_name, int32_t line_number)
+  void Init(Handle<String> declaring_class, Handle<String> method_name, Handle<String> file_name,
+            int32_t line_number)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* java_lang_StackTraceElement_;
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index ee719b4..1d79106 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -131,7 +131,7 @@
   return Alloc(self, array);
 }
 
-String* String::Alloc(Thread* self, const Handle<CharArray>& array) {
+String* String::Alloc(Thread* self, Handle<CharArray> array) {
   // Hold reference in case AllocObject causes GC.
   String* string = down_cast<String*>(GetJavaLangString()->AllocObject(self));
   if (LIKELY(string != nullptr)) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 169b671..6c3015f 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -137,7 +137,7 @@
   static String* Alloc(Thread* self, int32_t utf16_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static String* Alloc(Thread* self, const Handle<CharArray>& array)
+  static String* Alloc(Thread* self, Handle<CharArray> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 3123fd7..58e6dd4 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -637,7 +637,7 @@
   }
 }
 
-void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object>& obj, LockWord lock_word,
+void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWord lock_word,
                                 uint32_t hash_code) {
   DCHECK_EQ(lock_word.GetState(), LockWord::kThinLocked);
   uint32_t owner_thread_id = lock_word.ThinLockOwner();
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 7af2d4c..9e6d255 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -116,7 +116,7 @@
     return monitor_id_;
   }
 
-  static void InflateThinLocked(Thread* self, Handle<mirror::Object>& obj, LockWord lock_word,
+  static void InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWord lock_word,
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
   static bool Deflate(Thread* self, mirror::Object* obj)
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index d9c9b59..d55b545 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -221,7 +221,7 @@
 }
 
 // Based on ClassLinker::ResolveString.
-static void PreloadDexCachesResolveString(Handle<mirror::DexCache>& dex_cache, uint32_t string_idx,
+static void PreloadDexCachesResolveString(Handle<mirror::DexCache> dex_cache, uint32_t string_idx,
                                           StringTable& strings)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::String* string = dex_cache->GetResolvedString(string_idx);
@@ -267,8 +267,7 @@
 }
 
 // Based on ClassLinker::ResolveField.
-static void PreloadDexCachesResolveField(Handle<mirror::DexCache>& dex_cache,
-                                         uint32_t field_idx,
+static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uint32_t field_idx,
                                          bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = dex_cache->GetResolvedField(field_idx);
@@ -296,8 +295,7 @@
 }
 
 // Based on ClassLinker::ResolveMethod.
-static void PreloadDexCachesResolveMethod(Handle<mirror::DexCache>& dex_cache,
-                                          uint32_t method_idx,
+static void PreloadDexCachesResolveMethod(Handle<mirror::DexCache> dex_cache, uint32_t method_idx,
                                           InvokeType invoke_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method = dex_cache->GetResolvedMethod(method_idx);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 743fd1b..b5c07aa 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -121,8 +121,8 @@
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(const DexFile* dex_file,
-                                                        Handle<mirror::DexCache>& dex_cache,
-                                                        Handle<mirror::ClassLoader>& class_loader,
+                                                        Handle<mirror::DexCache> dex_cache,
+                                                        Handle<mirror::ClassLoader> class_loader,
                                                         const DexFile::ClassDef* class_def,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
@@ -151,7 +151,8 @@
     previous_direct_method_idx = method_idx;
     InvokeType type = it.GetMethodInvokeType(*class_def);
     mirror::ArtMethod* method =
-        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader, NULL, type);
+        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader,
+                              NullHandle<mirror::ArtMethod>(), type);
     if (method == NULL) {
       DCHECK(Thread::Current()->IsExceptionPending());
       // We couldn't resolve the method, but continue regardless.
@@ -194,7 +195,8 @@
     previous_virtual_method_idx = method_idx;
     InvokeType type = it.GetMethodInvokeType(*class_def);
     mirror::ArtMethod* method =
-        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader, NULL, type);
+        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader,
+                              NullHandle<mirror::ArtMethod>(), type);
     if (method == NULL) {
       DCHECK(Thread::Current()->IsExceptionPending());
       // We couldn't resolve the method, but continue regardless.
@@ -234,8 +236,8 @@
 
 MethodVerifier::FailureKind MethodVerifier::VerifyMethod(uint32_t method_idx,
                                                          const DexFile* dex_file,
-                                                         Handle<mirror::DexCache>& dex_cache,
-                                                         Handle<mirror::ClassLoader>& class_loader,
+                                                         Handle<mirror::DexCache> dex_cache,
+                                                         Handle<mirror::ClassLoader> class_loader,
                                                          const DexFile::ClassDef* class_def,
                                                          const DexFile::CodeItem* code_item,
                                                          mirror::ArtMethod* method,
@@ -281,8 +283,8 @@
 
 void MethodVerifier::VerifyMethodAndDump(std::ostream& os, uint32_t dex_method_idx,
                                          const DexFile* dex_file,
-                                         Handle<mirror::DexCache>& dex_cache,
-                                         Handle<mirror::ClassLoader>& class_loader,
+                                         Handle<mirror::DexCache> dex_cache,
+                                         Handle<mirror::ClassLoader> class_loader,
                                          const DexFile::ClassDef* class_def,
                                          const DexFile::CodeItem* code_item,
                                          mirror::ArtMethod* method,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 9bb915a..a23e80d 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -142,15 +142,15 @@
   /* Verify a class. Returns "kNoFailure" on success. */
   static FailureKind VerifyClass(mirror::Class* klass, bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static FailureKind VerifyClass(const DexFile* dex_file, Handle<mirror::DexCache>& dex_cache,
-                                 Handle<mirror::ClassLoader>& class_loader,
+  static FailureKind VerifyClass(const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
+                                 Handle<mirror::ClassLoader> class_loader,
                                  const DexFile::ClassDef* class_def,
                                  bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void VerifyMethodAndDump(std::ostream& os, uint32_t method_idx, const DexFile* dex_file,
-                                  Handle<mirror::DexCache>& dex_cache,
-                                  Handle<mirror::ClassLoader>& class_loader,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  Handle<mirror::ClassLoader> class_loader,
                                   const DexFile::ClassDef* class_def,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags)
@@ -256,8 +256,8 @@
    *      for code flow problems.
    */
   static FailureKind VerifyMethod(uint32_t method_idx, const DexFile* dex_file,
-                                  Handle<mirror::DexCache>& dex_cache,
-                                  Handle<mirror::ClassLoader>& class_loader,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  Handle<mirror::ClassLoader> class_loader,
                                   const DexFile::ClassDef* class_def_idx,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags,