Merge "Fix the computation of linear ordering."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index eb9ad47..84176a1 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -180,7 +180,8 @@
   driver/compiler_options.h \
   image_writer.h \
   optimizing/locations.h \
-  utils/arm/constants_arm.h
+  utils/arm/constants_arm.h \
+  utils/dex_instruction_utils.h
 
 # $(1): target or host
 # $(2): ndebug or debug
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index d311bc7..3daeb10 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -15,7 +15,6 @@
  */
 
 #include "global_value_numbering.h"
-
 #include "local_value_numbering.h"
 
 namespace art {
@@ -31,8 +30,6 @@
       modifications_allowed_(true),
       mode_(mode),
       global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
-      field_index_map_(FieldReferenceComparator(), allocator->Adapter()),
-      field_index_reverse_map_(allocator->Adapter()),
       array_location_map_(ArrayLocationComparator(), allocator->Adapter()),
       array_location_reverse_map_(allocator->Adapter()),
       ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()),
@@ -111,11 +108,7 @@
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_OBJECT ||
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_WIDE) &&
-        (bb->first_mir_insn == bb->last_mir_insn ||
-         (static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi &&
-          (bb->first_mir_insn->next == bb->last_mir_insn ||
-           (static_cast<int>(bb->first_mir_insn->next->dalvikInsn.opcode) == kMirOpPhi &&
-            bb->first_mir_insn->next->next == bb->last_mir_insn))))) {
+        bb->GetFirstNonPhiInsn() == bb->last_mir_insn) {
       merge_type = LocalValueNumbering::kReturnMerge;
     }
     // At least one predecessor must have been processed before this bb.
@@ -145,19 +138,6 @@
   return change;
 }
 
-uint16_t GlobalValueNumbering::GetFieldId(const MirFieldInfo& field_info, uint16_t type) {
-  FieldReference key = { field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex(), type };
-  auto lb = field_index_map_.lower_bound(key);
-  if (lb != field_index_map_.end() && !field_index_map_.key_comp()(key, lb->first)) {
-    return lb->second;
-  }
-  DCHECK_LT(field_index_map_.size(), kNoValue);
-  uint16_t id = field_index_map_.size();
-  auto it = field_index_map_.PutBefore(lb, key, id);
-  field_index_reverse_map_.push_back(&*it);
-  return id;
-}
-
 uint16_t GlobalValueNumbering::GetArrayLocation(uint16_t base, uint16_t index) {
   auto cmp = array_location_map_.key_comp();
   ArrayLocation key = { base, index };
@@ -207,4 +187,20 @@
   return true;
 }
 
+bool GlobalValueNumbering::DivZeroCheckedInAllPredecessors(
+    const ScopedArenaVector<uint16_t>& merge_names) const {
+  // Implicit parameters:
+  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - merge_lvns_: the predecessor LVNs.
+  DCHECK_EQ(merge_lvns_.size(), merge_names.size());
+  for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
+    const LocalValueNumbering* pred_lvn = merge_lvns_[i];
+    uint16_t value_name = merge_names[i];
+    if (!pred_lvn->IsValueDivZeroChecked(value_name)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index 72d1112..d72144a 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -39,6 +39,12 @@
         cu->mir_graph->GetMaxNestedLoops() > kMaxAllowedNestedLoops;
   }
 
+  // Instance and static field id map is held by MIRGraph to avoid multiple recalculations
+  // when doing LVN.
+  template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFieldLoweringInfo.
+  static uint16_t* PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
+                                      const Container& field_infos);
+
   GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator, Mode mode);
   ~GlobalValueNumbering();
 
@@ -114,34 +120,24 @@
     return (it != global_value_map_.end() && it->second == value);
   }
 
-  // FieldReference represents a unique resolved field.
-  struct FieldReference {
-    const DexFile* dex_file;
-    uint16_t field_idx;
-    uint16_t type;  // See comments for LocalValueNumbering::kFieldTypeCount.
-  };
+  // Get an instance field id.
+  uint16_t GetIFieldId(MIR* mir) {
+    return GetMirGraph()->GetGvnIFieldId(mir);
+  }
 
-  struct FieldReferenceComparator {
-    bool operator()(const FieldReference& lhs, const FieldReference& rhs) const {
-      if (lhs.field_idx != rhs.field_idx) {
-        return lhs.field_idx < rhs.field_idx;
-      }
-      // If the field_idx and dex_file match, the type must also match.
-      DCHECK(lhs.dex_file != rhs.dex_file || lhs.type == rhs.type);
-      return lhs.dex_file < rhs.dex_file;
-    }
-  };
+  // Get a static field id.
+  uint16_t GetSFieldId(MIR* mir) {
+    return GetMirGraph()->GetGvnSFieldId(mir);
+  }
 
-  // Maps field key to field id for resolved fields.
-  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
+  // Get an instance field type based on field id.
+  uint16_t GetIFieldType(uint16_t field_id) {
+    return static_cast<uint16_t>(GetMirGraph()->GetIFieldLoweringInfo(field_id).MemAccessType());
+  }
 
-  // Get a field id.
-  uint16_t GetFieldId(const MirFieldInfo& field_info, uint16_t type);
-
-  // Get a field type based on field id.
-  uint16_t GetFieldType(uint16_t field_id) {
-    DCHECK_LT(field_id, field_index_reverse_map_.size());
-    return field_index_reverse_map_[field_id]->first.type;
+  // Get a static field type based on field id.
+  uint16_t GetSFieldType(uint16_t field_id) {
+    return static_cast<uint16_t>(GetMirGraph()->GetSFieldLoweringInfo(field_id).MemAccessType());
   }
 
   struct ArrayLocation {
@@ -199,6 +195,8 @@
 
   bool NullCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
 
+  bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
+
   CompilationUnit* GetCompilationUnit() const {
     return cu_;
   }
@@ -239,8 +237,6 @@
   Mode mode_;
 
   ValueMap global_value_map_;
-  FieldIndexMap field_index_map_;
-  ScopedArenaVector<const FieldIndexMap::value_type*> field_index_reverse_map_;
   ArrayLocationMap array_location_map_;
   ScopedArenaVector<const ArrayLocationMap::value_type*> array_location_reverse_map_;
   RefSetIdMap ref_set_map_;
@@ -268,6 +264,32 @@
   return last_value_;
 }
 
+template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFieldLoweringInfo.
+uint16_t* GlobalValueNumbering::PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
+                                                   const Container& field_infos) {
+  size_t size = field_infos.size();
+  uint16_t* field_ids = reinterpret_cast<uint16_t*>(allocator->Alloc(size * sizeof(uint16_t),
+                                                                     kArenaAllocMisc));
+  for (size_t i = 0u; i != size; ++i) {
+    size_t idx = i;
+    const MirFieldInfo& cur_info = field_infos[i];
+    if (cur_info.IsResolved()) {
+      for (size_t j = 0; j != i; ++j) {
+        const MirFieldInfo& prev_info = field_infos[j];
+        if (prev_info.IsResolved() &&
+            prev_info.DeclaringDexFile() == cur_info.DeclaringDexFile() &&
+            prev_info.DeclaringFieldIndex() == cur_info.DeclaringFieldIndex()) {
+          DCHECK_EQ(cur_info.MemAccessType(), prev_info.MemAccessType());
+          idx = j;
+          break;
+        }
+      }
+    }
+    field_ids[i] = idx;
+  }
+  return field_ids;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index 35d5b99..7e3b4d8 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -17,6 +17,7 @@
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/mir_field_info.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
 #include "gtest/gtest.h"
@@ -32,6 +33,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct SFieldDef {
@@ -39,6 +41,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct BBDef {
@@ -131,18 +134,19 @@
     { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
 #define DEF_PHI2(bb, reg, src1, src2) \
     { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
+#define DEF_DIV_REM(bb, opcode, result, dividend, divisor) \
+    { bb, opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
     cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx);
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = 0u |  // Without kFlagIsStatic.
-            (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
     }
@@ -158,15 +162,14 @@
     cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx);
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
       // Mark even unresolved fields as initialized.
-      field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
-          MirSFieldLoweringInfo::kFlagClassIsInitialized;
+      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
       // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN.
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
@@ -238,12 +241,16 @@
       mir->dalvikInsn.opcode = def->opcode;
       mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
       mir->dalvikInsn.vB_wide = def->value;
-      if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+      if (IsInstructionIGetOrIPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_info;
-      } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
       } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
         mir->meta.phi_incoming = static_cast<BasicBlockId*>(
             allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo));
@@ -288,6 +295,10 @@
     cu_.mir_graph->ComputeDominators();
     cu_.mir_graph->ComputeTopologicalSortOrder();
     cu_.mir_graph->SSATransformationEnd();
+    cu_.mir_graph->temp_.gvn.ifield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
+    cu_.mir_graph->temp_.gvn.sfield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
     ASSERT_TRUE(gvn_ == nullptr);
     gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
                                                            GlobalValueNumbering::kModeGvn));
@@ -498,18 +509,18 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, NonAliasingIFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 0u, 0u, false },  // Unresolved, Int.
-      { 9u, 1u, 9u, false },  // Int.
-      { 10u, 1u, 10u, false },  // Int.
-      { 11u, 1u, 11u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved.
+      { 9u, 1u, 9u, false, kDexMemAccessWord },
+      { 10u, 1u, 10u, false, kDexMemAccessWord },
+      { 11u, 1u, 11u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -604,15 +615,15 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsSingleObject) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 1u, 8u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },  // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -671,15 +682,15 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsTwoObjects) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 1u, 8u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -740,15 +751,15 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, SFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 1u, 8u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1078,18 +1089,18 @@
 
 TEST_F(GlobalValueNumberingTestLoop, NonAliasingIFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Int.
-      { 5u, 1u, 5u, false },  // Short.
-      { 6u, 1u, 6u, false },  // Char.
-      { 7u, 0u, 0u, false },  // Unresolved, Short.
-      { 8u, 1u, 8u, false },  // Int.
-      { 9u, 0u, 0u, false },  // Unresolved, Int.
-      { 10u, 1u, 10u, false },  // Int.
-      { 11u, 1u, 11u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessWord },
+      { 5u, 1u, 5u, false, kDexMemAccessShort },
+      { 6u, 1u, 6u, false, kDexMemAccessChar },
+      { 7u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
+      { 9u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved.
+      { 10u, 1u, 10u, false, kDexMemAccessWord },
+      { 11u, 1u, 11u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1201,14 +1212,14 @@
 
 TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsSingleObject) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Int.
-      { 5u, 1u, 5u, false },  // Short.
-      { 6u, 1u, 6u, false },  // Char.
-      { 7u, 0u, 0u, false },  // Unresolved, Short.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessWord },
+      { 5u, 1u, 5u, false, kDexMemAccessShort },
+      { 6u, 1u, 6u, false, kDexMemAccessChar },
+      { 7u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1272,14 +1283,14 @@
 
 TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsTwoObjects) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Short.
-      { 4u, 1u, 4u, false },  // Char.
-      { 5u, 0u, 0u, false },  // Unresolved, Short.
-      { 6u, 1u, 6u, false },  // Int.
-      { 7u, 1u, 7u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessShort },
+      { 4u, 1u, 4u, false, kDexMemAccessChar },
+      { 5u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 6u, 1u, 6u, false, kDexMemAccessWord },
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1341,7 +1352,7 @@
 
 TEST_F(GlobalValueNumberingTestLoop, IFieldToBaseDependency) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // For the IGET that loads sreg 3u using base 2u, the following IPUT creates a dependency
@@ -1366,9 +1377,9 @@
 
 TEST_F(GlobalValueNumberingTestLoop, SFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1562,8 +1573,8 @@
 
 TEST_F(GlobalValueNumberingTestCatch, IFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },
-      { 1u, 1u, 1u, false },
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
@@ -1608,8 +1619,8 @@
 
 TEST_F(GlobalValueNumberingTestCatch, SFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },
-      { 1u, 1u, 1u, false },
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_SGET(3, Instruction::SGET, 0u, 0u),
@@ -1731,8 +1742,8 @@
 
 TEST_F(GlobalValueNumberingTest, NullCheckIFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Object.
-      { 1u, 1u, 1u, false },  // Object.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },  // Object.
+      { 1u, 1u, 1u, false, kDexMemAccessObject },  // Object.
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -1780,8 +1791,8 @@
 
 TEST_F(GlobalValueNumberingTest, NullCheckSFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Object.
-      { 1u, 1u, 1u, false },  // Object.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
+      { 1u, 1u, 1u, false, kDexMemAccessObject },
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -1907,12 +1918,12 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, MergeSameValueInDifferentMemoryLocations) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
@@ -1977,7 +1988,7 @@
   // LVN's aliasing_array_value_map_'s load_value_map for BBs #9, #4, #5, #7 because of the
   // DFS ordering of LVN evaluation.
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Object.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -2015,7 +2026,7 @@
 
 TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
@@ -2052,10 +2063,10 @@
 
 TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, NullCheck) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
@@ -2143,7 +2154,7 @@
 
 TEST_F(GlobalValueNumberingTestTwoNestedLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
@@ -2213,4 +2224,45 @@
   PerformGVN();
 }
 
+TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) {
+  static const MIRDef mirs[] = {
+      DEF_DIV_REM(3u, Instruction::DIV_INT, 1u, 20u, 21u),
+      DEF_DIV_REM(3u, Instruction::DIV_INT, 2u, 24u, 21u),
+      DEF_DIV_REM(3u, Instruction::DIV_INT, 3u, 20u, 23u),
+      DEF_DIV_REM(4u, Instruction::DIV_INT, 4u, 24u, 22u),
+      DEF_DIV_REM(4u, Instruction::DIV_INT, 9u, 24u, 25u),
+      DEF_DIV_REM(5u, Instruction::DIV_INT, 5u, 24u, 21u),
+      DEF_DIV_REM(5u, Instruction::DIV_INT, 10u, 24u, 26u),
+      DEF_PHI2(6u, 27u, 25u, 26u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 12u, 20u, 27u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 6u, 24u, 21u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 7u, 20u, 23u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 8u, 20u, 22u),
+  };
+
+  static const bool expected_ignore_div_zero_check[] = {
+      false,  // New divisor seen.
+      true,   // Eliminated since it has first divisor as first one.
+      false,  // New divisor seen.
+      false,  // New divisor seen.
+      false,  // New divisor seen.
+      true,   // Eliminated in dominating block.
+      false,  // New divisor seen.
+      false,  // Phi node.
+      true,   // Eliminated on both sides of diamond and merged via phi.
+      true,   // Eliminated in dominating block.
+      true,   // Eliminated in dominating block.
+      false,  // Only eliminated on one path of diamond.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_div_zero_check), mir_count_);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index c1ce2ac..c502b0c 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -56,7 +56,7 @@
  public:
   static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
                                      uint16_t field_id) {
-    uint16_t type = gvn->GetFieldType(field_id);
+    uint16_t type = gvn->GetIFieldType(field_id);
     return gvn->LookupValue(kAliasingIFieldStartVersionOp, field_id,
                             lvn->global_memory_version_, lvn->unresolved_ifield_version_[type]);
   }
@@ -75,7 +75,7 @@
   static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
                                    uint16_t field_id, uint16_t base) {
     // If the base/field_id is non-aliasing in lvn, use the non-aliasing value.
-    uint16_t type = gvn->GetFieldType(field_id);
+    uint16_t type = gvn->GetIFieldType(field_id);
     if (lvn->IsNonAliasingIField(base, field_id, type)) {
       uint16_t loc = gvn->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
       auto lb = lvn->non_aliasing_ifield_value_map_.find(loc);
@@ -89,7 +89,7 @@
 
   static bool HasNewBaseVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
                                 uint16_t field_id) {
-    uint16_t type = gvn->GetFieldType(field_id);
+    uint16_t type = gvn->GetIFieldType(field_id);
     return lvn->unresolved_ifield_version_[type] == lvn->merge_new_memory_version_ ||
         lvn->global_memory_version_ == lvn->merge_new_memory_version_;
   }
@@ -339,6 +339,7 @@
       escaped_array_clobber_set_(EscapedArrayClobberKeyComparator(), allocator->Adapter()),
       range_checked_(RangeCheckKeyComparator() , allocator->Adapter()),
       null_checked_(std::less<uint16_t>(), allocator->Adapter()),
+      div_zero_checked_(std::less<uint16_t>(), allocator->Adapter()),
       merge_names_(allocator->Adapter()),
       merge_map_(std::less<ScopedArenaVector<BasicBlockId>>(), allocator->Adapter()),
       merge_new_memory_version_(kNoValue) {
@@ -362,7 +363,8 @@
       escaped_ifield_clobber_set_ == other.escaped_ifield_clobber_set_ &&
       escaped_array_clobber_set_ == other.escaped_array_clobber_set_ &&
       range_checked_ == other.range_checked_ &&
-      null_checked_ == other.null_checked_;
+      null_checked_ == other.null_checked_ &&
+      div_zero_checked_ == other.div_zero_checked_;
 }
 
 void LocalValueNumbering::MergeOne(const LocalValueNumbering& other, MergeType merge_type) {
@@ -379,6 +381,7 @@
   non_aliasing_refs_ = other.non_aliasing_refs_;
   range_checked_ = other.range_checked_;
   null_checked_ = other.null_checked_;
+  div_zero_checked_ = other.div_zero_checked_;
 
   const BasicBlock* pred_bb = gvn_->GetBasicBlock(other.Id());
   if (GlobalValueNumbering::HasNullCheckLastInsn(pred_bb, Id())) {
@@ -699,6 +702,28 @@
   }
 }
 
+void LocalValueNumbering::MergeDivZeroChecked() {
+  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+
+  // Find the LVN with the least entries in the set.
+  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    if (lvn->div_zero_checked_.size() < least_entries_lvn->div_zero_checked_.size()) {
+      least_entries_lvn = lvn;
+    }
+  }
+
+  // For each div-zero value name check if it's div-zero checked in all the LVNs.
+  for (const auto& value_name : least_entries_lvn->div_zero_checked_) {
+    // Merge null_checked_ for this ref.
+    merge_names_.clear();
+    merge_names_.resize(gvn_->merge_lvns_.size(), value_name);
+    if (gvn_->DivZeroCheckedInAllPredecessors(merge_names_)) {
+      div_zero_checked_.insert(div_zero_checked_.end(), value_name);
+    }
+  }
+}
+
 void LocalValueNumbering::MergeSFieldValues(const SFieldToValueMap::value_type& entry,
                                             SFieldToValueMap::iterator hint) {
   uint16_t field_id = entry.first;
@@ -711,7 +736,7 @@
     if (it != lvn->sfield_value_map_.end()) {
       value_name = it->second;
     } else {
-      uint16_t type = gvn_->GetFieldType(field_id);
+      uint16_t type = gvn_->GetSFieldType(field_id);
       value_name = gvn_->LookupValue(kResolvedSFieldOp, field_id,
                                      lvn->unresolved_sfield_version_[type],
                                      lvn->global_memory_version_);
@@ -931,6 +956,9 @@
   // Merge null_checked_. We may later insert more, such as merged object field values.
   MergeNullChecked();
 
+  // Now merge the div_zero_checked_.
+  MergeDivZeroChecked();
+
   if (merge_type == kCatchMerge) {
     // Memory is clobbered. New memory version already created, don't merge aliasing locations.
     return;
@@ -1054,6 +1082,20 @@
   }
 }
 
+void LocalValueNumbering::HandleDivZeroCheck(MIR* mir, uint16_t reg) {
+  auto lb = div_zero_checked_.lower_bound(reg);
+  if (lb != div_zero_checked_.end() && *lb == reg) {
+    if (LIKELY(gvn_->CanModify())) {
+      if (gvn_->GetCompilationUnit()->verbose) {
+        LOG(INFO) << "Removing div zero check for 0x" << std::hex << mir->offset;
+      }
+      mir->optimization_flags |= MIR_IGNORE_DIV_ZERO_CHECK;
+    }
+  } else {
+    div_zero_checked_.insert(lb, reg);
+  }
+}
+
 void LocalValueNumbering::HandlePutObject(MIR* mir) {
   // If we're storing a non-aliasing reference, stop tracking it as non-aliasing now.
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1139,6 +1181,9 @@
       if (!wide && gvn_->NullCheckedInAllPredecessors(merge_names_)) {
         null_checked_.insert(value_name);
       }
+      if (gvn_->DivZeroCheckedInAllPredecessors(merge_names_)) {
+        div_zero_checked_.insert(value_name);
+      }
     }
   }
   if (wide) {
@@ -1150,12 +1195,11 @@
 }
 
 uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
-  // uint16_t type = opcode - Instruction::AGET;
   uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
   HandleNullCheck(mir, array);
   uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]);
   HandleRangeCheck(mir, array, index);
-  uint16_t type = opcode - Instruction::AGET;
+  uint16_t type = AGetMemAccessType(static_cast<Instruction::Code>(opcode));
   // Establish value number for loaded register.
   uint16_t res;
   if (IsNonAliasingArray(array, type)) {
@@ -1182,7 +1226,7 @@
   uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]);
   HandleRangeCheck(mir, array, index);
 
-  uint16_t type = opcode - Instruction::APUT;
+  uint16_t type = APutMemAccessType(static_cast<Instruction::Code>(opcode));
   uint16_t value = (opcode == Instruction::APUT_WIDE)
                    ? GetOperandValueWide(mir->ssa_rep->uses[0])
                    : GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1224,8 +1268,8 @@
     // Use result s_reg - will be unique.
     res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
   } else {
-    uint16_t type = opcode - Instruction::IGET;
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t type = IGetMemAccessType(static_cast<Instruction::Code>(opcode));
+    uint16_t field_id = gvn_->GetIFieldId(mir);
     if (IsNonAliasingIField(base, field_id, type)) {
       uint16_t loc = gvn_->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
       auto lb = non_aliasing_ifield_value_map_.lower_bound(loc);
@@ -1249,10 +1293,10 @@
 }
 
 void LocalValueNumbering::HandleIPut(MIR* mir, uint16_t opcode) {
-  uint16_t type = opcode - Instruction::IPUT;
   int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1;
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]);
   HandleNullCheck(mir, base);
+  uint16_t type = IPutMemAccessType(static_cast<Instruction::Code>(opcode));
   const MirFieldInfo& field_info = gvn_->GetMirGraph()->GetIFieldLoweringInfo(mir);
   if (!field_info.IsResolved()) {
     // Unresolved fields always alias with everything of the same type.
@@ -1272,7 +1316,7 @@
     // Aliasing fields of the same type may have been overwritten.
     auto it = aliasing_ifield_value_map_.begin(), end = aliasing_ifield_value_map_.end();
     while (it != end) {
-      if (gvn_->GetFieldType(it->first) != type) {
+      if (gvn_->GetIFieldType(it->first) != type) {
         ++it;
       } else {
         it = aliasing_ifield_value_map_.erase(it);
@@ -1282,7 +1326,7 @@
     // Nothing to do, resolved volatile fields always get a new memory version anyway and
     // can't alias with resolved non-volatile fields.
   } else {
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t field_id = gvn_->GetIFieldId(mir);
     uint16_t value = (opcode == Instruction::IPUT_WIDE)
                      ? GetOperandValueWide(mir->ssa_rep->uses[0])
                      : GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1333,8 +1377,8 @@
     // Use result s_reg - will be unique.
     res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
   } else {
-    uint16_t type = opcode - Instruction::SGET;
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t type = SGetMemAccessType(static_cast<Instruction::Code>(opcode));
+    uint16_t field_id = gvn_->GetSFieldId(mir);
     auto lb = sfield_value_map_.lower_bound(field_id);
     if (lb != sfield_value_map_.end() && lb->first == field_id) {
       res = lb->second;
@@ -1362,7 +1406,7 @@
     // Class initialization can call arbitrary functions, we need to wipe aliasing values.
     HandleInvokeOrClInitOrAcquireOp(mir);
   }
-  uint16_t type = opcode - Instruction::SPUT;
+  uint16_t type = SPutMemAccessType(static_cast<Instruction::Code>(opcode));
   if (!field_info.IsResolved()) {
     // Unresolved fields always alias with everything of the same type.
     // Use mir->offset as modifier; without elaborate inlining, it will be unique.
@@ -1373,7 +1417,7 @@
     // Nothing to do, resolved volatile fields always get a new memory version anyway and
     // can't alias with resolved non-volatile fields.
   } else {
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t field_id = gvn_->GetSFieldId(mir);
     uint16_t value = (opcode == Instruction::SPUT_WIDE)
                      ? GetOperandValueWide(mir->ssa_rep->uses[0])
                      : GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1397,7 +1441,7 @@
 void LocalValueNumbering::RemoveSFieldsForType(uint16_t type) {
   // Erase all static fields of this type from the sfield_value_map_.
   for (auto it = sfield_value_map_.begin(), end = sfield_value_map_.end(); it != end; ) {
-    if (gvn_->GetFieldType(it->first) == type) {
+    if (gvn_->GetSFieldType(it->first) == type) {
       it = sfield_value_map_.erase(it);
     } else {
       ++it;
@@ -1696,6 +1740,13 @@
       }
       break;
 
+    case Instruction::DIV_INT:
+    case Instruction::DIV_INT_2ADDR:
+    case Instruction::REM_INT:
+    case Instruction::REM_INT_2ADDR:
+      HandleDivZeroCheck(mir, GetOperandValue(mir->ssa_rep->uses[1]));
+      FALLTHROUGH_INTENDED;
+
     case Instruction::CMPG_FLOAT:
     case Instruction::CMPL_FLOAT:
     case Instruction::ADD_INT:
@@ -1710,10 +1761,6 @@
     case Instruction::XOR_INT_2ADDR:
     case Instruction::SUB_INT:
     case Instruction::SUB_INT_2ADDR:
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
     case Instruction::SHL_INT:
     case Instruction::SHL_INT_2ADDR:
     case Instruction::SHR_INT:
@@ -1728,19 +1775,22 @@
       }
       break;
 
+    case Instruction::DIV_LONG:
+    case Instruction::REM_LONG:
+    case Instruction::DIV_LONG_2ADDR:
+    case Instruction::REM_LONG_2ADDR:
+      HandleDivZeroCheck(mir, GetOperandValueWide(mir->ssa_rep->uses[2]));
+      FALLTHROUGH_INTENDED;
+
     case Instruction::ADD_LONG:
     case Instruction::SUB_LONG:
     case Instruction::MUL_LONG:
-    case Instruction::DIV_LONG:
-    case Instruction::REM_LONG:
     case Instruction::AND_LONG:
     case Instruction::OR_LONG:
     case Instruction::XOR_LONG:
     case Instruction::ADD_LONG_2ADDR:
     case Instruction::SUB_LONG_2ADDR:
     case Instruction::MUL_LONG_2ADDR:
-    case Instruction::DIV_LONG_2ADDR:
-    case Instruction::REM_LONG_2ADDR:
     case Instruction::AND_LONG_2ADDR:
     case Instruction::OR_LONG_2ADDR:
     case Instruction::XOR_LONG_2ADDR:
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 979fd5a..8613f97 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -47,6 +47,10 @@
     return null_checked_.find(value_name) != null_checked_.end();
   }
 
+  bool IsValueDivZeroChecked(uint16_t value_name) const {
+    return div_zero_checked_.find(value_name) != div_zero_checked_.end();
+  }
+
   bool IsSregValue(uint16_t s_reg, uint16_t value_name) const {
     auto it = sreg_value_map_.find(s_reg);
     if (it != sreg_value_map_.end()) {
@@ -286,6 +290,7 @@
   bool IsNonAliasingArray(uint16_t reg, uint16_t type) const;
   void HandleNullCheck(MIR* mir, uint16_t reg);
   void HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index);
+  void HandleDivZeroCheck(MIR* mir, uint16_t reg);
   void HandlePutObject(MIR* mir);
   void HandleEscapingRef(uint16_t base);
   void HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn);
@@ -337,6 +342,7 @@
   void MergeNonAliasingIFieldValues(const IFieldLocToValueMap::value_type& entry,
                                     IFieldLocToValueMap::iterator hint);
   void MergeNullChecked();
+  void MergeDivZeroChecked();
 
   template <typename Map, Map LocalValueNumbering::*map_ptr, typename Versions>
   void MergeAliasingValues(const typename Map::value_type& entry, typename Map::iterator hint);
@@ -371,6 +377,7 @@
   // Range check and null check elimination.
   RangeCheckSet range_checked_;
   ValueNameSet null_checked_;
+  ValueNameSet div_zero_checked_;
 
   // Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack.
   ScopedArenaVector<BasicBlockId> merge_names_;
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index 824c323..f4f13f2 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "compiler_internals.h"
+#include "dex/mir_field_info.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
 #include "gtest/gtest.h"
@@ -28,6 +29,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct SFieldDef {
@@ -35,6 +37,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct MIRDef {
@@ -84,18 +87,21 @@
     { opcode, 0u, 0u, 1, { reg }, 0, { } }
 #define DEF_UNIQUE_REF(opcode, reg) \
     { opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
+#define DEF_DIV_REM(opcode, result, dividend, divisor) \
+    { opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
+#define DEF_DIV_REM_WIDE(opcode, result, dividend, divisor) \
+    { opcode, 0u, 0u, 4, { dividend, dividend + 1, divisor, divisor + 1 }, 2, { result, result + 1 } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
     cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx);
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = 0u |  // Without kFlagIsStatic.
-            (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
     }
@@ -111,15 +117,14 @@
     cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx);
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
       // Mark even unresolved fields as initialized.
-      field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
-          MirSFieldLoweringInfo::kFlagClassIsInitialized;
+      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
       // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by LVN.
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
@@ -140,12 +145,16 @@
       mir->dalvikInsn.opcode = def->opcode;
       mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
       mir->dalvikInsn.vB_wide = def->value;
-      if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+      if (IsInstructionIGetOrIPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_info;
-      } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
       }
       mir->ssa_rep = &ssa_reps_[i];
       mir->ssa_rep->num_uses = def->num_uses;
@@ -177,6 +186,13 @@
   }
 
   void PerformLVN() {
+    cu_.mir_graph->temp_.gvn.ifield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
+    cu_.mir_graph->temp_.gvn.sfield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
+    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
+                                                           GlobalValueNumbering::kModeLvn));
+    lvn_.reset(new (allocator_.get()) LocalValueNumbering(gvn_.get(), 0u, allocator_.get()));
     value_names_.resize(mir_count_);
     for (size_t i = 0; i != mir_count_; ++i) {
       value_names_[i] =  lvn_->GetValueNumber(&mirs_[i]);
@@ -196,9 +212,6 @@
         value_names_() {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
-                                                           GlobalValueNumbering::kModeLvn));
-    lvn_.reset(new (allocator_.get()) LocalValueNumbering(gvn_.get(), 0u, allocator_.get()));
   }
 
   ArenaPool pool_;
@@ -214,7 +227,7 @@
 
 TEST_F(LocalValueNumberingTest, IGetIGetInvokeIGet) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
@@ -237,8 +250,8 @@
 
 TEST_F(LocalValueNumberingTest, IGetIPutIGetIGetIGet) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessObject },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET_OBJECT, 0u, 10u, 0u),
@@ -262,7 +275,7 @@
 
 TEST_F(LocalValueNumberingTest, UniquePreserve1) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 10u),
@@ -284,7 +297,7 @@
 
 TEST_F(LocalValueNumberingTest, UniquePreserve2) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 11u),
@@ -306,7 +319,7 @@
 
 TEST_F(LocalValueNumberingTest, UniquePreserveAndEscape) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 10u),
@@ -331,8 +344,8 @@
 
 TEST_F(LocalValueNumberingTest, Volatile) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, true },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, true, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 10u, 1u),  // Volatile.
@@ -358,9 +371,9 @@
 
 TEST_F(LocalValueNumberingTest, UnresolvedIField) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
-      { 2u, 1u, 2u, false },  // Resolved field #2.
-      { 3u, 0u, 0u, false },  // Unresolved field.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWide },  // Resolved field #2.
+      { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
@@ -407,9 +420,9 @@
 
 TEST_F(LocalValueNumberingTest, UnresolvedSField) {
   static const SFieldDef sfields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
-      { 2u, 1u, 2u, false },  // Resolved field #2.
-      { 3u, 0u, 0u, false },  // Unresolved field.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWide },  // Resolved field #2.
+      { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
   };
   static const MIRDef mirs[] = {
       DEF_SGET(Instruction::SGET, 0u, 0u),            // Resolved field #1.
@@ -438,11 +451,11 @@
 
 TEST_F(LocalValueNumberingTest, UninitializedSField) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
   };
   static const SFieldDef sfields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
-      { 2u, 1u, 2u, false },  // Resolved field #2; uninitialized.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWord },  // Resolved field #2; uninitialized.
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 200u),
@@ -487,11 +500,11 @@
 
 TEST_F(LocalValueNumberingTest, SameValueInDifferentMemoryLocations) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const SFieldDef sfields[] = {
-      { 3u, 1u, 3u, false },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 201u),
@@ -551,12 +564,12 @@
 
 TEST_F(LocalValueNumberingTest, EscapingRefs) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },  // Field #1.
-      { 2u, 1u, 2u, false },  // Field #2.
-      { 3u, 1u, 3u, false },  // Reference field for storing escaping refs.
-      { 4u, 1u, 4u, false },  // Wide.
-      { 5u, 0u, 0u, false },  // Unresolved field, int.
-      { 6u, 0u, 0u, false },  // Unresolved field, wide.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },    // Field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWord },    // Field #2.
+      { 3u, 1u, 3u, false, kDexMemAccessObject },  // For storing escaping refs.
+      { 4u, 1u, 4u, false, kDexMemAccessWide },    // Wide.
+      { 5u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved field, int.
+      { 6u, 0u, 0u, false, kDexMemAccessWide },    // Unresolved field, wide.
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
@@ -634,11 +647,11 @@
 
 TEST_F(LocalValueNumberingTest, StoringSameValueKeepsMemoryVersion) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const SFieldDef sfields[] = {
-      { 2u, 1u, 2u, false },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 30u, 0u),
@@ -716,8 +729,8 @@
 
 TEST_F(LocalValueNumberingTest, ClInitOnSget) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },
-      { 1u, 2u, 1u, false },
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
+      { 1u, 2u, 1u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_SGET(Instruction::SGET_OBJECT, 0u, 0u),
@@ -735,4 +748,26 @@
   EXPECT_NE(value_names_[0], value_names_[3]);
 }
 
+TEST_F(LocalValueNumberingTest, DivZeroCheck) {
+  static const MIRDef mirs[] = {
+      DEF_DIV_REM(Instruction::DIV_INT, 1u, 10u, 20u),
+      DEF_DIV_REM(Instruction::DIV_INT, 2u, 20u, 20u),
+      DEF_DIV_REM(Instruction::DIV_INT_2ADDR, 3u, 10u, 1u),
+      DEF_DIV_REM(Instruction::REM_INT, 4u, 30u, 20u),
+      DEF_DIV_REM_WIDE(Instruction::REM_LONG, 5u, 12u, 14u),
+      DEF_DIV_REM_WIDE(Instruction::DIV_LONG_2ADDR, 7u, 16u, 14u),
+  };
+
+  static const bool expected_ignore_div_zero_check[] = {
+      false, true, false, true, false, true,
+  };
+
+  PrepareMIRs(mirs);
+  PerformLVN();
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 44f69ba..7b53b14 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -21,6 +21,7 @@
 #include "dataflow_iterator-inl.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
+#include "dex/mir_field_info.h"
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
@@ -1204,6 +1205,8 @@
   ScopedArenaAllocator allocator(&cu_->arena_stack);
   uint16_t* field_idxs =
       reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc));
+  DexMemAccessType* field_types = reinterpret_cast<DexMemAccessType*>(
+      allocator.Alloc(max_refs * sizeof(DexMemAccessType), kArenaAllocMisc));
 
   // Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end.
   size_t ifield_pos = 0u;
@@ -1214,38 +1217,41 @@
       continue;
     }
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (mir->dalvikInsn.opcode >= Instruction::IGET &&
-          mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
-        // Get field index and try to find it among existing indexes. If found, it's usually among
-        // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this
-        // is a linear search, it actually performs much better than map based approach.
-        if (mir->dalvikInsn.opcode <= Instruction::IPUT_SHORT) {
-          uint16_t field_idx = mir->dalvikInsn.vC;
-          size_t i = ifield_pos;
-          while (i != 0u && field_idxs[i - 1] != field_idx) {
-            --i;
-          }
-          if (i != 0u) {
-            mir->meta.ifield_lowering_info = i - 1;
-          } else {
-            mir->meta.ifield_lowering_info = ifield_pos;
-            field_idxs[ifield_pos++] = field_idx;
-          }
-        } else {
-          uint16_t field_idx = mir->dalvikInsn.vB;
-          size_t i = sfield_pos;
-          while (i != max_refs && field_idxs[i] != field_idx) {
-            ++i;
-          }
-          if (i != max_refs) {
-            mir->meta.sfield_lowering_info = max_refs - i - 1u;
-          } else {
-            mir->meta.sfield_lowering_info = max_refs - sfield_pos;
-            field_idxs[--sfield_pos] = field_idx;
-          }
+      // Get field index and try to find it among existing indexes. If found, it's usually among
+      // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this
+      // is a linear search, it actually performs much better than map based approach.
+      if (IsInstructionIGetOrIPut(mir->dalvikInsn.opcode)) {
+        uint16_t field_idx = mir->dalvikInsn.vC;
+        size_t i = ifield_pos;
+        while (i != 0u && field_idxs[i - 1] != field_idx) {
+          --i;
         }
-        DCHECK_LE(ifield_pos, sfield_pos);
+        if (i != 0u) {
+          mir->meta.ifield_lowering_info = i - 1;
+          DCHECK_EQ(field_types[i - 1], IGetOrIPutMemAccessType(mir->dalvikInsn.opcode));
+        } else {
+          mir->meta.ifield_lowering_info = ifield_pos;
+          field_idxs[ifield_pos] = field_idx;
+          field_types[ifield_pos] = IGetOrIPutMemAccessType(mir->dalvikInsn.opcode);
+          ++ifield_pos;
+        }
+      } else if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
+        uint16_t field_idx = mir->dalvikInsn.vB;
+        size_t i = sfield_pos;
+        while (i != max_refs && field_idxs[i] != field_idx) {
+          ++i;
+        }
+        if (i != max_refs) {
+          mir->meta.sfield_lowering_info = max_refs - i - 1u;
+          DCHECK_EQ(field_types[i], SGetOrSPutMemAccessType(mir->dalvikInsn.opcode));
+        } else {
+          mir->meta.sfield_lowering_info = max_refs - sfield_pos;
+          --sfield_pos;
+          field_idxs[sfield_pos] = field_idx;
+          field_types[sfield_pos] = SGetOrSPutMemAccessType(mir->dalvikInsn.opcode);
+        }
       }
+      DCHECK_LE(ifield_pos, sfield_pos);
     }
   }
 
@@ -1254,7 +1260,7 @@
     DCHECK_EQ(ifield_lowering_infos_.size(), 0u);
     ifield_lowering_infos_.reserve(ifield_pos);
     for (size_t pos = 0u; pos != ifield_pos; ++pos) {
-      ifield_lowering_infos_.push_back(MirIFieldLoweringInfo(field_idxs[pos]));
+      ifield_lowering_infos_.push_back(MirIFieldLoweringInfo(field_idxs[pos], field_types[pos]));
     }
     MirIFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
                                    ifield_lowering_infos_.data(), ifield_pos);
@@ -1266,7 +1272,7 @@
     sfield_lowering_infos_.reserve(max_refs - sfield_pos);
     for (size_t pos = max_refs; pos != sfield_pos;) {
       --pos;
-      sfield_lowering_infos_.push_back(MirSFieldLoweringInfo(field_idxs[pos]));
+      sfield_lowering_infos_.push_back(MirSFieldLoweringInfo(field_idxs[pos], field_types[pos]));
     }
     MirSFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
                                    sfield_lowering_infos_.data(), max_refs - sfield_pos);
@@ -1329,19 +1335,10 @@
       continue;
     }
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (mir->dalvikInsn.opcode >= Instruction::INVOKE_VIRTUAL &&
-          mir->dalvikInsn.opcode <= Instruction::INVOKE_INTERFACE_RANGE &&
-          mir->dalvikInsn.opcode != Instruction::RETURN_VOID_BARRIER) {
+      if (IsInstructionInvoke(mir->dalvikInsn.opcode)) {
         // Decode target method index and invoke type.
-        uint16_t target_method_idx;
-        uint16_t invoke_type_idx;
-        if (mir->dalvikInsn.opcode <= Instruction::INVOKE_INTERFACE) {
-          target_method_idx = mir->dalvikInsn.vB;
-          invoke_type_idx = mir->dalvikInsn.opcode - Instruction::INVOKE_VIRTUAL;
-        } else {
-          target_method_idx = mir->dalvikInsn.vB;
-          invoke_type_idx = mir->dalvikInsn.opcode - Instruction::INVOKE_VIRTUAL_RANGE;
-        }
+        uint16_t target_method_idx = mir->dalvikInsn.vB;
+        DexInvokeType invoke_type_idx = InvokeInstructionType(mir->dalvikInsn.opcode);
 
         // Find devirtualization target.
         // TODO: The devirt map is ordered by the dex pc here. Is there a way to get INVOKEs
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index 1db3b5b..53afcad 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -35,7 +35,7 @@
     DCHECK(field_infos != nullptr);
     DCHECK_NE(count, 0u);
     for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-      MirIFieldLoweringInfo unresolved(it->field_idx_);
+      MirIFieldLoweringInfo unresolved(it->field_idx_, it->MemAccessType());
       DCHECK_EQ(memcmp(&unresolved, &*it, sizeof(*it)), 0);
     }
   }
@@ -66,6 +66,7 @@
     std::pair<bool, bool> fast_path = compiler_driver->IsFastInstanceField(
         dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx);
     it->flags_ = 0u |  // Without kFlagIsStatic.
+        (it->flags_ & (kMemAccessTypeMask << kBitMemAccessTypeBegin)) |
         (is_volatile ? kFlagIsVolatile : 0u) |
         (fast_path.first ? kFlagFastGet : 0u) |
         (fast_path.second ? kFlagFastPut : 0u);
@@ -79,7 +80,7 @@
     DCHECK(field_infos != nullptr);
     DCHECK_NE(count, 0u);
     for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-      MirSFieldLoweringInfo unresolved(it->field_idx_);
+      MirSFieldLoweringInfo unresolved(it->field_idx_, it->MemAccessType());
       // In 64-bit builds, there's padding after storage_index_, don't include it in memcmp.
       size_t size = OFFSETOF_MEMBER(MirSFieldLoweringInfo, storage_index_) +
           sizeof(it->storage_index_);
@@ -114,6 +115,7 @@
     std::pair<bool, bool> fast_path = compiler_driver->IsFastStaticField(
         dex_cache.Get(), referrer_class, resolved_field, field_idx, &it->storage_index_);
     uint16_t flags = kFlagIsStatic |
+        (it->flags_ & (kMemAccessTypeMask << kBitMemAccessTypeBegin)) |
         (is_volatile ? kFlagIsVolatile : 0u) |
         (fast_path.first ? kFlagFastGet : 0u) |
         (fast_path.second ? kFlagFastPut : 0u);
diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h
index e97f7a0..ff427f8 100644
--- a/compiler/dex/mir_field_info.h
+++ b/compiler/dex/mir_field_info.h
@@ -20,6 +20,7 @@
 #include "base/macros.h"
 #include "dex_file.h"
 #include "offsets.h"
+#include "utils/dex_instruction_utils.h"
 
 namespace art {
 
@@ -63,18 +64,27 @@
     return (flags_ & kFlagIsVolatile) != 0u;
   }
 
+  DexMemAccessType MemAccessType() const {
+    return static_cast<DexMemAccessType>((flags_ >> kBitMemAccessTypeBegin) & kMemAccessTypeMask);
+  }
+
  protected:
   enum {
     kBitIsStatic = 0,
     kBitIsVolatile,
-    kFieldInfoBitEnd
+    kBitMemAccessTypeBegin,
+    kBitMemAccessTypeEnd = kBitMemAccessTypeBegin + 3,  // 3 bits for raw type.
+    kFieldInfoBitEnd = kBitMemAccessTypeEnd
   };
   static constexpr uint16_t kFlagIsVolatile = 1u << kBitIsVolatile;
   static constexpr uint16_t kFlagIsStatic = 1u << kBitIsStatic;
+  static constexpr uint16_t kMemAccessTypeMask = 7u;
+  static_assert((1u << (kBitMemAccessTypeEnd - kBitMemAccessTypeBegin)) - 1u == kMemAccessTypeMask,
+                "Invalid raw type mask");
 
-  MirFieldInfo(uint16_t field_idx, uint16_t flags)
+  MirFieldInfo(uint16_t field_idx, uint16_t flags, DexMemAccessType type)
       : field_idx_(field_idx),
-        flags_(flags),
+        flags_(flags | static_cast<uint16_t>(type) << kBitMemAccessTypeBegin),
         declaring_field_idx_(0u),
         declaring_class_idx_(0u),
         declaring_dex_file_(nullptr) {
@@ -107,8 +117,8 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Construct an unresolved instance field lowering info.
-  explicit MirIFieldLoweringInfo(uint16_t field_idx)
-      : MirFieldInfo(field_idx, kFlagIsVolatile),  // Without kFlagIsStatic.
+  explicit MirIFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type)
+      : MirFieldInfo(field_idx, kFlagIsVolatile, type),  // Without kFlagIsStatic.
         field_offset_(0u) {
   }
 
@@ -155,8 +165,8 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Construct an unresolved static field lowering info.
-  explicit MirSFieldLoweringInfo(uint16_t field_idx)
-      : MirFieldInfo(field_idx, kFlagIsVolatile | kFlagIsStatic),
+  explicit MirSFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type)
+      : MirFieldInfo(field_idx, kFlagIsVolatile | kFlagIsStatic, type),
         field_offset_(0u),
         storage_index_(DexFile::kDexNoIndex) {
   }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index b87ab66..fdf01eb 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -97,11 +97,6 @@
       max_nested_loops_(0u),
       i_dom_list_(NULL),
       temp_scoped_alloc_(),
-      temp_insn_data_(nullptr),
-      temp_bit_vector_size_(0u),
-      temp_bit_vector_(nullptr),
-      temp_bit_matrix_(nullptr),
-      temp_gvn_(),
       block_list_(arena->Adapter(kArenaAllocBBList)),
       try_block_addr_(NULL),
       entry_block_(NULL),
@@ -133,6 +128,7 @@
       sfield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
       method_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
       gen_suspend_test_list_(arena->Adapter()) {
+  memset(&temp_, 0, sizeof(temp_));
   use_counts_.reserve(256);
   raw_use_counts_.reserve(256);
   block_list_.reserve(100);
@@ -262,8 +258,6 @@
   DCHECK(insn != orig_block->first_mir_insn);
   DCHECK(insn == bottom_block->first_mir_insn);
   DCHECK_EQ(insn->offset, bottom_block->start_offset);
-  DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
-         !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode));
   DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id);
   // Scan the "bottom" instructions, remapping them to the
   // newly created "bottom" block.
@@ -919,7 +913,7 @@
                 bb->first_mir_insn ? " | " : " ");
         for (mir = bb->first_mir_insn; mir; mir = mir->next) {
             int opcode = mir->dalvikInsn.opcode;
-            fprintf(file, "    {%04x %s %s %s %s %s %s %s %s\\l}%s\\\n", mir->offset,
+            fprintf(file, "    {%04x %s %s %s %s %s %s %s %s %s\\l}%s\\\n", mir->offset,
                       mir->ssa_rep ? GetDalvikDisassembly(mir) :
                       !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
                         Instruction::Name(mir->dalvikInsn.opcode) :
@@ -931,6 +925,7 @@
                       (mir->optimization_flags & MIR_CALLEE) != 0 ? " inlined" : " ",
                       (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0 ? " cl_inited" : " ",
                       (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0 ? " cl_in_cache" : " ",
+                      (mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) != 0 ? " no_div_check" : " ",
                       mir->next ? " | " : " ");
         }
         fprintf(file, "  }\"];\n\n");
@@ -1173,6 +1168,14 @@
   return true;
 }
 
+MIR* BasicBlock::GetFirstNonPhiInsn() {
+  MIR* mir = first_mir_insn;
+  while (mir != nullptr && static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi) {
+    mir = mir->next;
+  }
+  return mir;
+}
+
 MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) {
   MIR* next_mir = nullptr;
 
@@ -1213,6 +1216,10 @@
   int defs = (ssa_rep != nullptr) ? ssa_rep->num_defs : 0;
   int uses = (ssa_rep != nullptr) ? ssa_rep->num_uses : 0;
 
+  if (opcode < kMirOpFirst) {
+    return;  // It is not an extended instruction.
+  }
+
   decoded_mir->append(extended_mir_op_names_[opcode - kMirOpFirst]);
 
   switch (opcode) {
@@ -1348,9 +1355,10 @@
           decoded_mir->append(", ");
           decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[1], false));
         }
-        decoded_mir->append(StringPrintf(" = vect%d", mir->dalvikInsn.vB));
+        decoded_mir->append(StringPrintf(" = vect%d (extr_idx:%d)", mir->dalvikInsn.vB, mir->dalvikInsn.arg[0]));
       } else {
-        decoded_mir->append(StringPrintf(" v%d = vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB));
+        decoded_mir->append(StringPrintf(" v%d = vect%d (extr_idx:%d)", mir->dalvikInsn.vA,
+                                         mir->dalvikInsn.vB, mir->dalvikInsn.arg[0]));
       }
       FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
       break;
@@ -1681,9 +1689,9 @@
 void MIRGraph::SSATransformationStart() {
   DCHECK(temp_scoped_alloc_.get() == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_bit_vector_size_ = GetNumOfCodeAndTempVRs();
-  temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapRegisterV);
+  temp_.ssa.num_vregs = GetNumOfCodeAndTempVRs();
+  temp_.ssa.work_live_vregs = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), temp_.ssa.num_vregs, false, kBitMapRegisterV);
 }
 
 void MIRGraph::SSATransformationEnd() {
@@ -1692,9 +1700,9 @@
     VerifyDataflow();
   }
 
-  temp_bit_vector_size_ = 0u;
-  temp_bit_vector_ = nullptr;
-  temp_bit_matrix_ = nullptr;  // Def block matrix.
+  temp_.ssa.num_vregs = 0u;
+  temp_.ssa.work_live_vregs = nullptr;
+  temp_.ssa.def_block_matrix = nullptr;
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   temp_scoped_alloc_.reset();
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index a1d24e2..c8ea972 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -444,6 +444,11 @@
   void UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred);
 
   /**
+   * @brief Return first non-Phi insn.
+   */
+  MIR* GetFirstNonPhiInsn();
+
+  /**
    * @brief Used to obtain the next MIR that follows unconditionally.
    * @details The implementation does not guarantee that a MIR does not
    * follow even if this method returns nullptr.
@@ -661,13 +666,29 @@
   void DoCacheFieldLoweringInfo();
 
   const MirIFieldLoweringInfo& GetIFieldLoweringInfo(MIR* mir) const {
-    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
-    return ifield_lowering_infos_[mir->meta.ifield_lowering_info];
+    return GetIFieldLoweringInfo(mir->meta.ifield_lowering_info);
+  }
+
+  const MirIFieldLoweringInfo& GetIFieldLoweringInfo(uint32_t lowering_info) const {
+    DCHECK_LT(lowering_info, ifield_lowering_infos_.size());
+    return ifield_lowering_infos_[lowering_info];
+  }
+
+  size_t GetIFieldLoweringInfoCount() const {
+    return ifield_lowering_infos_.size();
   }
 
   const MirSFieldLoweringInfo& GetSFieldLoweringInfo(MIR* mir) const {
-    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
-    return sfield_lowering_infos_[mir->meta.sfield_lowering_info];
+    return GetSFieldLoweringInfo(mir->meta.sfield_lowering_info);
+  }
+
+  const MirSFieldLoweringInfo& GetSFieldLoweringInfo(uint32_t lowering_info) const {
+    DCHECK_LT(lowering_info, sfield_lowering_infos_.size());
+    return sfield_lowering_infos_[lowering_info];
+  }
+
+  size_t GetSFieldLoweringInfoCount() const {
+    return sfield_lowering_infos_.size();
   }
 
   void DoCacheMethodLoweringInfo();
@@ -1035,6 +1056,21 @@
   bool ApplyGlobalValueNumberingGate();
   bool ApplyGlobalValueNumbering(BasicBlock* bb);
   void ApplyGlobalValueNumberingEnd();
+
+  uint16_t GetGvnIFieldId(MIR* mir) const {
+    DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
+    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
+    DCHECK(temp_.gvn.ifield_ids_ != nullptr);
+    return temp_.gvn.ifield_ids_[mir->meta.ifield_lowering_info];
+  }
+
+  uint16_t GetGvnSFieldId(MIR* mir) const {
+    DCHECK(IsInstructionSGetOrSPut(mir->dalvikInsn.opcode));
+    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
+    DCHECK(temp_.gvn.sfield_ids_ != nullptr);
+    return temp_.gvn.sfield_ids_[mir->meta.sfield_lowering_info];
+  }
+
   /*
    * Type inference handling helpers.  Because Dalvik's bytecode is not fully typed,
    * we have to do some work to figure out the sreg type.  For some operations it is
@@ -1270,15 +1306,40 @@
   size_t max_nested_loops_;
   int* i_dom_list_;
   std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
-  uint16_t* temp_insn_data_;
-  uint32_t temp_bit_vector_size_;
-  ArenaBitVector* temp_bit_vector_;
-  // temp_bit_matrix_ used as one of
-  //   - def_block_matrix: original num registers x num_blocks_,
-  //   - ending_null_check_matrix: num_blocks_ x original num registers,
-  //   - ending_clinit_check_matrix: num_blocks_ x unique class count.
-  ArenaBitVector** temp_bit_matrix_;
-  std::unique_ptr<GlobalValueNumbering> temp_gvn_;
+  // Union of temporaries used by different passes.
+  union {
+    // Class init check elimination.
+    struct {
+      size_t num_class_bits;  // 2 bits per class: class initialized and class in dex cache.
+      ArenaBitVector* work_classes_to_check;
+      ArenaBitVector** ending_classes_to_check_matrix;  // num_blocks_ x num_class_bits.
+      uint16_t* indexes;
+    } cice;
+    // Null check elimination.
+    struct {
+      size_t num_vregs;
+      ArenaBitVector* work_vregs_to_check;
+      ArenaBitVector** ending_vregs_to_check_matrix;  // num_blocks_ x num_vregs.
+    } nce;
+    // Special method inlining.
+    struct {
+      size_t num_indexes;
+      ArenaBitVector* processed_indexes;
+      uint16_t* lowering_infos;
+    } smi;
+    // SSA transformation.
+    struct {
+      size_t num_vregs;
+      ArenaBitVector* work_live_vregs;
+      ArenaBitVector** def_block_matrix;  // num_vregs x num_blocks_.
+    } ssa;
+    // Global value numbering.
+    struct {
+      GlobalValueNumbering* gvn;
+      uint16_t* ifield_ids_;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
+      uint16_t* sfield_ids_;  // Ditto.
+    } gvn;
+  } temp_;
   static const int kInvalidEntry = -1;
   ArenaVector<BasicBlock*> block_list_;
   ArenaBitVector* try_block_addr_;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index a0ad213..1f630f7 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -19,6 +19,7 @@
 #include "dataflow_iterator-inl.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
+#include "mir_field_info.h"
 #include "quick/dex_file_method_inliner.h"
 #include "quick/dex_file_to_method_inliner_map.h"
 #include "stack.h"
@@ -217,10 +218,6 @@
 static_assert(arraysize(kIfCcZConditionCodes) == Instruction::IF_LEZ - Instruction::IF_EQZ + 1,
               "if_ccz_ccodes_size1");
 
-static constexpr bool IsInstructionIfCcZ(Instruction::Code opcode) {
-  return Instruction::IF_EQZ <= opcode && opcode <= Instruction::IF_LEZ;
-}
-
 static constexpr ConditionCode ConditionCodeForIfCcZ(Instruction::Code opcode) {
   return kIfCcZConditionCodes[opcode - Instruction::IF_EQZ];
 }
@@ -480,29 +477,25 @@
             }
           }
           break;
-        case Instruction::GOTO:
-        case Instruction::GOTO_16:
-        case Instruction::GOTO_32:
-        case Instruction::IF_EQ:
-        case Instruction::IF_NE:
-        case Instruction::IF_LT:
-        case Instruction::IF_GE:
-        case Instruction::IF_GT:
-        case Instruction::IF_LE:
-        case Instruction::IF_EQZ:
-        case Instruction::IF_NEZ:
-        case Instruction::IF_LTZ:
-        case Instruction::IF_GEZ:
-        case Instruction::IF_GTZ:
-        case Instruction::IF_LEZ:
-          // If we've got a backwards branch to return, no need to suspend check.
-          if ((IsBackedge(bb, bb->taken) && GetBasicBlock(bb->taken)->dominates_return) ||
-              (IsBackedge(bb, bb->fall_through) &&
-                          GetBasicBlock(bb->fall_through)->dominates_return)) {
-            mir->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
-            if (cu_->verbose) {
-              LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex
-                        << mir->offset;
+        case Instruction::RETURN_VOID:
+        case Instruction::RETURN:
+        case Instruction::RETURN_WIDE:
+        case Instruction::RETURN_OBJECT:
+          if (bb->GetFirstNonPhiInsn() == mir) {
+            // This is a simple return BB. Eliminate suspend checks on predecessor back-edges.
+            for (BasicBlockId pred_id : bb->predecessors) {
+              BasicBlock* pred_bb = GetBasicBlock(pred_id);
+              DCHECK(pred_bb != nullptr);
+              if (IsBackedge(pred_bb, bb->id) && pred_bb->last_mir_insn != nullptr &&
+                  (IsInstructionIfCc(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
+                   IsInstructionIfCcZ(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
+                   IsInstructionGoto(pred_bb->last_mir_insn->dalvikInsn.opcode))) {
+                pred_bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
+                if (cu_->verbose) {
+                  LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex
+                            << pred_bb->last_mir_insn->offset;
+                }
+              }
             }
           }
           break;
@@ -801,17 +794,18 @@
     BasicBlock* bb_next = GetBasicBlock(bb->fall_through);
     DCHECK(!bb_next->catch_entry);
     DCHECK_EQ(bb_next->predecessors.size(), 1u);
-    // Overwrite the kMirOpCheck insn with the paired opcode.
+
+    // Now move instructions from bb_next to bb. Start off with doing a sanity check
+    // that kMirOpCheck's throw instruction is first one in the bb_next.
     DCHECK_EQ(bb_next->first_mir_insn, throw_insn);
-    *bb->last_mir_insn = *throw_insn;
-    // And grab the rest of the instructions from bb_next.
-    bb->last_mir_insn = bb_next->last_mir_insn;
-    throw_insn->next = nullptr;
-    bb_next->last_mir_insn = throw_insn;
-    // Mark acquired instructions as belonging to bb.
-    for (MIR* insn = mir; insn != nullptr; insn = insn->next) {
-      insn->bb = bb->id;
-    }
+    // Now move all instructions (throw instruction to last one) from bb_next to bb.
+    MIR* last_to_move = bb_next->last_mir_insn;
+    bb_next->RemoveMIRList(throw_insn, last_to_move);
+    bb->InsertMIRListAfter(bb->last_mir_insn, throw_insn, last_to_move);
+    // The kMirOpCheck instruction is not needed anymore.
+    mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+    bb->RemoveMIR(mir);
+
     // Before we overwrite successors, remove their predecessor links to bb.
     bb_next->ErasePredecessor(bb->id);
     if (bb->taken != NullBasicBlockId) {
@@ -891,12 +885,12 @@
 
   DCHECK(temp_scoped_alloc_.get() == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_bit_vector_size_ = GetNumOfCodeVRs();
-  temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapNullCheck);
-  temp_bit_matrix_ = static_cast<ArenaBitVector**>(
+  temp_.nce.num_vregs = GetNumOfCodeAndTempVRs();
+  temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
+  temp_.nce.ending_vregs_to_check_matrix = static_cast<ArenaBitVector**>(
       temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc));
-  std::fill_n(temp_bit_matrix_, GetNumBlocks(), nullptr);
+  std::fill_n(temp_.nce.ending_vregs_to_check_matrix, GetNumBlocks(), nullptr);
 
   // reset MIR_MARK
   AllNodesIterator iter(this);
@@ -919,7 +913,7 @@
     return false;
   }
 
-  ArenaBitVector* vregs_to_check = temp_bit_vector_;
+  ArenaBitVector* vregs_to_check = temp_.nce.work_vregs_to_check;
   /*
    * Set initial state. Catch blocks don't need any special treatment.
    */
@@ -940,7 +934,7 @@
     // Starting state is union of all incoming arcs.
     bool copied_first = false;
     for (BasicBlockId pred_id : bb->predecessors) {
-      if (temp_bit_matrix_[pred_id] == nullptr) {
+      if (temp_.nce.ending_vregs_to_check_matrix[pred_id] == nullptr) {
         continue;
       }
       BasicBlock* pred_bb = GetBasicBlock(pred_id);
@@ -962,9 +956,9 @@
       }
       if (!copied_first) {
         copied_first = true;
-        vregs_to_check->Copy(temp_bit_matrix_[pred_id]);
+        vregs_to_check->Copy(temp_.nce.ending_vregs_to_check_matrix[pred_id]);
       } else {
-        vregs_to_check->Union(temp_bit_matrix_[pred_id]);
+        vregs_to_check->Union(temp_.nce.ending_vregs_to_check_matrix[pred_id]);
       }
       if (null_check_insn != nullptr) {
         vregs_to_check->ClearBit(null_check_insn->dalvikInsn.vA);
@@ -979,7 +973,10 @@
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     uint64_t df_attributes = GetDataFlowAttributes(mir);
 
-    DCHECK_EQ(df_attributes & DF_NULL_TRANSFER_N, 0u);  // No Phis yet.
+    if ((df_attributes & DF_NULL_TRANSFER_N) != 0u) {
+      // The algorithm was written in a phi agnostic way.
+      continue;
+    }
 
     // Might need a null check?
     if (df_attributes & DF_HAS_NULL_CHKS) {
@@ -1057,27 +1054,27 @@
 
   // Did anything change?
   bool nce_changed = false;
-  ArenaBitVector* old_ending_ssa_regs_to_check = temp_bit_matrix_[bb->id];
+  ArenaBitVector* old_ending_ssa_regs_to_check = temp_.nce.ending_vregs_to_check_matrix[bb->id];
   if (old_ending_ssa_regs_to_check == nullptr) {
     DCHECK(temp_scoped_alloc_.get() != nullptr);
     nce_changed = vregs_to_check->GetHighestBitSet() != -1;
-    temp_bit_matrix_[bb->id] = vregs_to_check;
+    temp_.nce.ending_vregs_to_check_matrix[bb->id] = vregs_to_check;
     // Create a new vregs_to_check for next BB.
-    temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
-        temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapNullCheck);
+    temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
+        temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
   } else if (!vregs_to_check->SameBitsSet(old_ending_ssa_regs_to_check)) {
     nce_changed = true;
-    temp_bit_matrix_[bb->id] = vregs_to_check;
-    temp_bit_vector_ = old_ending_ssa_regs_to_check;  // Reuse for vregs_to_check for next BB.
+    temp_.nce.ending_vregs_to_check_matrix[bb->id] = vregs_to_check;
+    temp_.nce.work_vregs_to_check = old_ending_ssa_regs_to_check;  // Reuse for next BB.
   }
   return nce_changed;
 }
 
 void MIRGraph::EliminateNullChecksEnd() {
   // Clean up temporaries.
-  temp_bit_vector_size_ = 0u;
-  temp_bit_vector_ = nullptr;
-  temp_bit_matrix_ = nullptr;
+  temp_.nce.num_vregs = 0u;
+  temp_.nce.work_vregs_to_check = nullptr;
+  temp_.nce.ending_vregs_to_check_matrix = nullptr;
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   temp_scoped_alloc_.reset();
 
@@ -1124,9 +1121,9 @@
 
   // Each insn we use here has at least 2 code units, offset/2 will be a unique index.
   const size_t end = (GetNumDalvikInsns() + 1u) / 2u;
-  temp_insn_data_ = static_cast<uint16_t*>(
-      temp_scoped_alloc_->Alloc(end * sizeof(*temp_insn_data_), kArenaAllocGrowableArray));
-  std::fill_n(temp_insn_data_, end, 0xffffu);
+  temp_.cice.indexes = static_cast<uint16_t*>(
+      temp_scoped_alloc_->Alloc(end * sizeof(*temp_.cice.indexes), kArenaAllocGrowableArray));
+  std::fill_n(temp_.cice.indexes, end, 0xffffu);
 
   uint32_t unique_class_count = 0u;
   {
@@ -1159,8 +1156,7 @@
     for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
       if (bb->block_type == kDalvikByteCode) {
         for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          if (mir->dalvikInsn.opcode >= Instruction::SGET &&
-              mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
+          if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
             const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir);
             if (!field_info.IsReferrersClass()) {
               DCHECK_LT(class_to_index_map.size(), 0xffffu);
@@ -1173,11 +1169,10 @@
                   static_cast<uint16_t>(class_to_index_map.size())
               };
               uint16_t index = class_to_index_map.insert(entry).first->index;
-              // Using offset/2 for index into temp_insn_data_.
-              temp_insn_data_[mir->offset / 2u] = index;
+              // Using offset/2 for index into temp_.cice.indexes.
+              temp_.cice.indexes[mir->offset / 2u] = index;
             }
-          } else if (mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-              mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE) {
+          } else if (IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
             const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
             DCHECK(method_info.IsStatic());
             if (method_info.FastPath() && !method_info.IsReferrersClass()) {
@@ -1187,8 +1182,8 @@
                   static_cast<uint16_t>(class_to_index_map.size())
               };
               uint16_t index = class_to_index_map.insert(entry).first->index;
-              // Using offset/2 for index into temp_insn_data_.
-              temp_insn_data_[mir->offset / 2u] = index;
+              // Using offset/2 for index into temp_.cice.indexes.
+              temp_.cice.indexes[mir->offset / 2u] = index;
             }
           }
         }
@@ -1199,19 +1194,19 @@
 
   if (unique_class_count == 0u) {
     // All SGET/SPUTs refer to initialized classes. Nothing to do.
-    temp_insn_data_ = nullptr;
+    temp_.cice.indexes = nullptr;
     temp_scoped_alloc_.reset();
     return false;
   }
 
   // 2 bits for each class: is class initialized, is class in dex cache.
-  temp_bit_vector_size_ = 2u * unique_class_count;
-  temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapClInitCheck);
-  temp_bit_matrix_ = static_cast<ArenaBitVector**>(
+  temp_.cice.num_class_bits = 2u * unique_class_count;
+  temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck);
+  temp_.cice.ending_classes_to_check_matrix = static_cast<ArenaBitVector**>(
       temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc));
-  std::fill_n(temp_bit_matrix_, GetNumBlocks(), nullptr);
-  DCHECK_GT(temp_bit_vector_size_, 0u);
+  std::fill_n(temp_.cice.ending_classes_to_check_matrix, GetNumBlocks(), nullptr);
+  DCHECK_GT(temp_.cice.num_class_bits, 0u);
   return true;
 }
 
@@ -1229,22 +1224,22 @@
   /*
    * Set initial state.  Catch blocks don't need any special treatment.
    */
-  ArenaBitVector* classes_to_check = temp_bit_vector_;
+  ArenaBitVector* classes_to_check = temp_.cice.work_classes_to_check;
   DCHECK(classes_to_check != nullptr);
   if (bb->block_type == kEntryBlock) {
-    classes_to_check->SetInitialBits(temp_bit_vector_size_);
+    classes_to_check->SetInitialBits(temp_.cice.num_class_bits);
   } else {
     // Starting state is union of all incoming arcs.
     bool copied_first = false;
     for (BasicBlockId pred_id : bb->predecessors) {
-      if (temp_bit_matrix_[pred_id] == nullptr) {
+      if (temp_.cice.ending_classes_to_check_matrix[pred_id] == nullptr) {
         continue;
       }
       if (!copied_first) {
         copied_first = true;
-        classes_to_check->Copy(temp_bit_matrix_[pred_id]);
+        classes_to_check->Copy(temp_.cice.ending_classes_to_check_matrix[pred_id]);
       } else {
-        classes_to_check->Union(temp_bit_matrix_[pred_id]);
+        classes_to_check->Union(temp_.cice.ending_classes_to_check_matrix[pred_id]);
       }
     }
     DCHECK(copied_first);  // At least one predecessor must have been processed before this bb.
@@ -1253,7 +1248,7 @@
 
   // Walk through the instruction in the block, updating as necessary
   for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    uint16_t index = temp_insn_data_[mir->offset / 2u];
+    uint16_t index = temp_.cice.indexes[mir->offset / 2u];
     if (index != 0xffffu) {
       bool check_initialization = false;
       bool check_dex_cache = false;
@@ -1261,12 +1256,10 @@
       // NOTE: index != 0xffff does not guarantee that this is an SGET/SPUT/INVOKE_STATIC.
       // Dex instructions with width 1 can have the same offset/2.
 
-      if (mir->dalvikInsn.opcode >= Instruction::SGET &&
-          mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
+      if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
         check_initialization = true;
         check_dex_cache = true;
-      } else if (mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-               mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE) {
+      } else if (IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
         check_initialization = true;
         // NOTE: INVOKE_STATIC doesn't guarantee that the type will be in the dex cache.
       }
@@ -1299,29 +1292,29 @@
 
   // Did anything change?
   bool changed = false;
-  ArenaBitVector* old_ending_classes_to_check = temp_bit_matrix_[bb->id];
+  ArenaBitVector* old_ending_classes_to_check = temp_.cice.ending_classes_to_check_matrix[bb->id];
   if (old_ending_classes_to_check == nullptr) {
     DCHECK(temp_scoped_alloc_.get() != nullptr);
     changed = classes_to_check->GetHighestBitSet() != -1;
-    temp_bit_matrix_[bb->id] = classes_to_check;
+    temp_.cice.ending_classes_to_check_matrix[bb->id] = classes_to_check;
     // Create a new classes_to_check for next BB.
-    temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
-        temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapClInitCheck);
+    temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
+        temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck);
   } else if (!classes_to_check->Equal(old_ending_classes_to_check)) {
     changed = true;
-    temp_bit_matrix_[bb->id] = classes_to_check;
-    temp_bit_vector_ = old_ending_classes_to_check;  // Reuse for classes_to_check for next BB.
+    temp_.cice.ending_classes_to_check_matrix[bb->id] = classes_to_check;
+    temp_.cice.work_classes_to_check = old_ending_classes_to_check;  // Reuse for next BB.
   }
   return changed;
 }
 
 void MIRGraph::EliminateClassInitChecksEnd() {
   // Clean up temporaries.
-  temp_bit_vector_size_ = 0u;
-  temp_bit_vector_ = nullptr;
-  temp_bit_matrix_ = nullptr;
-  DCHECK(temp_insn_data_ != nullptr);
-  temp_insn_data_ = nullptr;
+  temp_.cice.num_class_bits = 0u;
+  temp_.cice.work_classes_to_check = nullptr;
+  temp_.cice.ending_classes_to_check_matrix = nullptr;
+  DCHECK(temp_.cice.indexes != nullptr);
+  temp_.cice.indexes = nullptr;
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   temp_scoped_alloc_.reset();
 }
@@ -1333,39 +1326,43 @@
 
   DCHECK(temp_scoped_alloc_ == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  DCHECK(temp_gvn_ == nullptr);
-  temp_gvn_.reset(
-      new (temp_scoped_alloc_.get()) GlobalValueNumbering(cu_, temp_scoped_alloc_.get(),
-                                                          GlobalValueNumbering::kModeGvn));
+  temp_.gvn.ifield_ids_ =
+      GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
+  temp_.gvn.sfield_ids_ =
+      GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
+  DCHECK(temp_.gvn.gvn == nullptr);
+  temp_.gvn.gvn = new (temp_scoped_alloc_.get()) GlobalValueNumbering(
+      cu_, temp_scoped_alloc_.get(), GlobalValueNumbering::kModeGvn);
   return true;
 }
 
 bool MIRGraph::ApplyGlobalValueNumbering(BasicBlock* bb) {
-  DCHECK(temp_gvn_ != nullptr);
-  LocalValueNumbering* lvn = temp_gvn_->PrepareBasicBlock(bb);
+  DCHECK(temp_.gvn.gvn != nullptr);
+  LocalValueNumbering* lvn = temp_.gvn.gvn->PrepareBasicBlock(bb);
   if (lvn != nullptr) {
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
       lvn->GetValueNumber(mir);
     }
   }
-  bool change = (lvn != nullptr) && temp_gvn_->FinishBasicBlock(bb);
+  bool change = (lvn != nullptr) && temp_.gvn.gvn->FinishBasicBlock(bb);
   return change;
 }
 
 void MIRGraph::ApplyGlobalValueNumberingEnd() {
   // Perform modifications.
-  if (temp_gvn_->Good()) {
+  DCHECK(temp_.gvn.gvn != nullptr);
+  if (temp_.gvn.gvn->Good()) {
     if (max_nested_loops_ != 0u) {
-      temp_gvn_->StartPostProcessing();
+      temp_.gvn.gvn->StartPostProcessing();
       TopologicalSortIterator iter(this);
       for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
         ScopedArenaAllocator allocator(&cu_->arena_stack);  // Reclaim memory after each LVN.
-        LocalValueNumbering* lvn = temp_gvn_->PrepareBasicBlock(bb, &allocator);
+        LocalValueNumbering* lvn = temp_.gvn.gvn->PrepareBasicBlock(bb, &allocator);
         if (lvn != nullptr) {
           for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
             lvn->GetValueNumber(mir);
           }
-          bool change = temp_gvn_->FinishBasicBlock(bb);
+          bool change = temp_.gvn.gvn->FinishBasicBlock(bb);
           DCHECK(!change) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
         }
       }
@@ -1376,16 +1373,18 @@
     LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
 
-  DCHECK(temp_gvn_ != nullptr);
-  temp_gvn_.reset();
+  delete temp_.gvn.gvn;
+  temp_.gvn.gvn = nullptr;
+  temp_.gvn.ifield_ids_ = nullptr;
+  temp_.gvn.sfield_ids_ = nullptr;
   DCHECK(temp_scoped_alloc_ != nullptr);
   temp_scoped_alloc_.reset();
 }
 
 void MIRGraph::ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke, MIR* iget_or_iput) {
   uint32_t method_index = invoke->meta.method_lowering_info;
-  if (temp_bit_vector_->IsBitSet(method_index)) {
-    iget_or_iput->meta.ifield_lowering_info = temp_insn_data_[method_index];
+  if (temp_.smi.processed_indexes->IsBitSet(method_index)) {
+    iget_or_iput->meta.ifield_lowering_info = temp_.smi.lowering_infos[method_index];
     DCHECK_EQ(field_idx, GetIFieldLoweringInfo(iget_or_iput).FieldIndex());
     return;
   }
@@ -1396,14 +1395,15 @@
       cu_, cu_->class_loader, cu_->class_linker, *target.dex_file,
       nullptr /* code_item not used */, 0u /* class_def_idx not used */, target.dex_method_index,
       0u /* access_flags not used */, nullptr /* verified_method not used */);
-  MirIFieldLoweringInfo inlined_field_info(field_idx);
+  DexMemAccessType type = IGetOrIPutMemAccessType(iget_or_iput->dalvikInsn.opcode);
+  MirIFieldLoweringInfo inlined_field_info(field_idx, type);
   MirIFieldLoweringInfo::Resolve(cu_->compiler_driver, &inlined_unit, &inlined_field_info, 1u);
   DCHECK(inlined_field_info.IsResolved());
 
   uint32_t field_info_index = ifield_lowering_infos_.size();
   ifield_lowering_infos_.push_back(inlined_field_info);
-  temp_bit_vector_->SetBit(method_index);
-  temp_insn_data_[method_index] = field_info_index;
+  temp_.smi.processed_indexes->SetBit(method_index);
+  temp_.smi.lowering_infos[method_index] = field_info_index;
   iget_or_iput->meta.ifield_lowering_info = field_info_index;
 }
 
@@ -1425,12 +1425,12 @@
 
   DCHECK(temp_scoped_alloc_.get() == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_bit_vector_size_ = method_lowering_infos_.size();
-  temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapMisc);
-  temp_bit_vector_->ClearAllBits();
-  temp_insn_data_ = static_cast<uint16_t*>(temp_scoped_alloc_->Alloc(
-      temp_bit_vector_size_ * sizeof(*temp_insn_data_), kArenaAllocGrowableArray));
+  temp_.smi.num_indexes = method_lowering_infos_.size();
+  temp_.smi.processed_indexes = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), temp_.smi.num_indexes, false, kBitMapMisc);
+  temp_.smi.processed_indexes->ClearAllBits();
+  temp_.smi.lowering_infos = static_cast<uint16_t*>(temp_scoped_alloc_->Alloc(
+      temp_.smi.num_indexes * sizeof(*temp_.smi.lowering_infos), kArenaAllocGrowableArray));
 }
 
 void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
@@ -1477,10 +1477,12 @@
 }
 
 void MIRGraph::InlineSpecialMethodsEnd() {
-  DCHECK(temp_insn_data_ != nullptr);
-  temp_insn_data_ = nullptr;
-  DCHECK(temp_bit_vector_ != nullptr);
-  temp_bit_vector_ = nullptr;
+  // Clean up temporaries.
+  DCHECK(temp_.smi.lowering_infos != nullptr);
+  temp_.smi.lowering_infos = nullptr;
+  temp_.smi.num_indexes = 0u;
+  DCHECK(temp_.smi.processed_indexes != nullptr);
+  temp_.smi.processed_indexes = nullptr;
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   temp_scoped_alloc_.reset();
 }
@@ -1542,6 +1544,14 @@
 }
 
 void MIRGraph::BasicBlockOptimization() {
+  if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) {
+    temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+    temp_.gvn.ifield_ids_ =
+        GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
+    temp_.gvn.sfield_ids_ =
+        GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
+  }
+
   if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
     ClearAllVisitedFlags();
     PreOrderDfsIterator iter2(this);
@@ -1558,6 +1568,11 @@
       BasicBlockOpt(bb);
     }
   }
+
+  // Clean up after LVN.
+  temp_.gvn.ifield_ids_ = nullptr;
+  temp_.gvn.sfield_ids_ = nullptr;
+  temp_scoped_alloc_.reset();
 }
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 8874faf..c794cc6 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -19,6 +19,7 @@
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/mir_field_info.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -236,15 +237,17 @@
       ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
       BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
       bb->AppendMIR(mir);
-      if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
-        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->field_or_method_info;
-      } else if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+      if (IsInstructionIGetOrIPut(def->opcode)) {
         ASSERT_LT(def->field_or_method_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_or_method_info;
-      } else if (def->opcode >= Instruction::INVOKE_VIRTUAL &&
-          def->opcode < Instruction::INVOKE_INTERFACE_RANGE &&
-          def->opcode != Instruction::RETURN_VOID_BARRIER) {
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_or_method_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
+        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.size());
+        mir->meta.sfield_lowering_info = def->field_or_method_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_or_method_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
+      } else if (IsInstructionInvoke(def->opcode)) {
         ASSERT_LT(def->field_or_method_info, cu_.mir_graph->method_lowering_infos_.size());
         mir->meta.method_lowering_info = def->field_or_method_info;
       }
@@ -294,6 +297,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_class_idx;
     uint16_t declaring_field_idx;
+    DexMemAccessType type;
   };
 
   void DoPrepareSFields(const SFieldDef* defs, size_t count) {
@@ -301,12 +305,12 @@
     cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx);
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_class_idx_ = def->declaring_class_idx;
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic;
+        // We don't care about the volatile flag in these tests.
       }
       ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
       ASSERT_FALSE(field_info.IsClassInitialized());
@@ -343,6 +347,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_class_idx;
     uint16_t declaring_field_idx;
+    DexMemAccessType type;
   };
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
@@ -350,11 +355,12 @@
     cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx);
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_class_idx_ = def->declaring_class_idx;
         field_info.declaring_field_idx_ = def->declaring_field_idx;
+        // We don't care about the volatile flag in these tests.
       }
       ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
       cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
@@ -393,12 +399,12 @@
 
 TEST_F(ClassInitCheckEliminationTest, SingleBlock) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },  // Same declaring class as sfield[4].
-      { 4u, 1u, 3u, 4u },  // Same declaring class as sfield[3].
-      { 5u, 0u, 0u, 0u },  // Unresolved.
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },  // Same declaring class as sfield[4].
+      { 4u, 1u, 3u, 4u, kDexMemAccessWord },  // Same declaring class as sfield[3].
+      { 5u, 0u, 0u, 0u, kDexMemAccessWord },  // Unresolved.
   };
   static const MIRDef mirs[] = {
       DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 5u),  // Unresolved.
@@ -432,9 +438,9 @@
 
 TEST_F(ClassInitCheckEliminationTest, SingleBlockWithInvokes) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
   };
   static const MethodDef methods[] = {
       { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
@@ -473,17 +479,17 @@
 
 TEST_F(ClassInitCheckEliminationTest, Diamond) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },
-      { 4u, 1u, 4u, 4u },
-      { 5u, 1u, 5u, 5u },
-      { 6u, 1u, 6u, 6u },
-      { 7u, 1u, 7u, 7u },
-      { 8u, 1u, 8u, 8u },  // Same declaring class as sfield[9].
-      { 9u, 1u, 8u, 9u },  // Same declaring class as sfield[8].
-      { 10u, 0u, 0u, 0u },  // Unresolved.
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
+      { 4u, 1u, 4u, 4u, kDexMemAccessWord },
+      { 5u, 1u, 5u, 5u, kDexMemAccessWord },
+      { 6u, 1u, 6u, 6u, kDexMemAccessWord },
+      { 7u, 1u, 7u, 7u, kDexMemAccessWord },
+      { 8u, 1u, 8u, 8u, kDexMemAccessWord },   // Same declaring class as sfield[9].
+      { 9u, 1u, 8u, 9u, kDexMemAccessWord },   // Same declaring class as sfield[8].
+      { 10u, 0u, 0u, 0u, kDexMemAccessWord },  // Unresolved.
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -539,11 +545,11 @@
 
 TEST_F(ClassInitCheckEliminationTest, DiamondWithInvokes) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },
-      { 4u, 1u, 4u, 4u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
+      { 4u, 1u, 4u, 4u, kDexMemAccessWord },
   };
   static const MethodDef methods[] = {
       { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
@@ -600,9 +606,9 @@
 
 TEST_F(ClassInitCheckEliminationTest, Loop) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),
@@ -631,7 +637,7 @@
 
 TEST_F(ClassInitCheckEliminationTest, LoopWithInvokes) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
   };
   static const MethodDef methods[] = {
       { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
@@ -671,10 +677,10 @@
 
 TEST_F(ClassInitCheckEliminationTest, Catch) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),  // Before the exception edge.
@@ -707,9 +713,9 @@
 
 TEST_F(NullCheckEliminationTest, SingleBlock) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 0u, 1u },
-      { 2u, 1u, 0u, 2u },  // Object.
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 0u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 0u, 2u, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_IGET_IPUT(3u, Instruction::IGET_OBJECT, 0u, 100u, 2u),
@@ -768,9 +774,9 @@
 
 TEST_F(NullCheckEliminationTest, Diamond) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 0u, 1u },
-      { 2u, 1u, 0u, 2u },  // int[].
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 0u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 0u, 2u, kDexMemAccessObject },  // int[].
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -816,8 +822,8 @@
 
 TEST_F(NullCheckEliminationTest, Loop) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET_IPUT(3u, Instruction::IGET, 0u, 100u, 0u),
@@ -846,8 +852,8 @@
 
 TEST_F(NullCheckEliminationTest, Catch) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET_IPUT(3u, Instruction::IGET, 0u, 100u, 0u),  // Before the exception edge.
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index a3b4df3..f15d707 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -288,18 +288,12 @@
   StoreValue(rl_dest, rl_result);
 }
 
-/*
- * Mark garbage collection card. Skip if the value we're storing is null.
- */
-void ArmMir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
-  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index d235199..e8d0c32 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -106,7 +106,9 @@
                        OpSize size, VolatileKind is_volatile) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
                           OpSize size) OVERRIDE;
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+
+    /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
+    void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 3e5b7bf..089e4b6 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -251,20 +251,14 @@
   StoreValue(rl_dest, rl_result);
 }
 
-/*
- * Mark garbage collection card. Skip if the value we're storing is null.
- */
-void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTempWide();
   RegStorage reg_card_no = AllocTempWide();  // Needs to be wide as addr is ref=64b
-  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
   StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
                    0, kUnsignedByte);
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 5182a89..5e10f80 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -94,7 +94,10 @@
   LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
                         OpSize size) OVERRIDE;
   LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale) OVERRIDE;
-  void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
+
+  /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
+  void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
+
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
                          int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 9403516..80cb535 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -314,6 +314,15 @@
   }
 }
 
+void Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+  DCHECK(val_reg.Valid());
+  DCHECK_EQ(val_reg.Is64Bit(), cu_->target64);
+  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, nullptr);
+  UnconditionallyMarkGCCard(tgt_addr_reg);
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+}
+
 /* Dump instructions and constant pool contents */
 void Mir2Lir::CodegenDump() {
   LOG(INFO) << "Dumping LIR insns for "
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 98ddc36..4dd24cb 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -416,8 +416,8 @@
   // share array alignment with ints (see comment at head of function)
   size_t component_size = sizeof(int32_t);
 
-  // Having a range of 0 is legal
-  if (info->is_range && (elems > 0)) {
+  if (elems > 5) {
+    DCHECK(info->is_range);  // Non-range insn can't encode more than 5 elems.
     /*
      * Bit of ugliness here.  We're going generate a mem copy loop
      * on the register range, but it is possible that some regs
@@ -487,7 +487,11 @@
       OpRegRegImm(kOpAdd, ref_reg, r_dst,
                   -mirror::Array::DataOffset(component_size).Int32Value());
     }
-  } else if (!info->is_range) {
+    FreeTemp(r_idx);
+    FreeTemp(r_dst);
+    FreeTemp(r_src);
+  } else {
+    DCHECK_LE(elems, 5);  // Usually but not necessarily non-range.
     // TUNING: interleave
     for (int i = 0; i < elems; i++) {
       RegLocation rl_arg;
@@ -507,6 +511,15 @@
       }
     }
   }
+  if (elems != 0 && info->args[0].ref) {
+    // If there is at least one potentially non-null value, unconditionally mark the GC card.
+    for (int i = 0; i < elems; i++) {
+      if (!mir_graph_->IsConstantNullRef(info->args[i])) {
+        UnconditionallyMarkGCCard(ref_reg);
+        break;
+      }
+    }
+  }
   if (info->result.location != kLocInvalid) {
     StoreValue(info->result, GetReturn(kRefReg));
   }
@@ -570,6 +583,7 @@
 
 void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+  DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
   if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
@@ -688,6 +702,7 @@
 
 void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Type type) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+  DCHECK_EQ(SGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
 
   if (!SLOW_FIELD_PATH && field_info.FastGet()) {
@@ -826,6 +841,7 @@
 void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
                       RegLocation rl_dest, RegLocation rl_obj) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
+  DCHECK_EQ(IGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
   if (!SLOW_FIELD_PATH && field_info.FastGet()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -899,6 +915,7 @@
 void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
                       RegLocation rl_src, RegLocation rl_obj) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
+  DCHECK_EQ(IPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
   if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index ed73ef0..3bb81bf 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -222,19 +222,13 @@
   StoreValue(rl_dest, rl_result);
 }
 
-/*
- * Mark garbage collection card. Skip if the value we're storing is null.
- */
-void MipsMir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
-  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   // NOTE: native pointer.
   LoadWordDisp(rs_rMIPS_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 7e9d80d..e08846c 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -49,7 +49,9 @@
                           OpSize size) OVERRIDE;
     LIR* GenAtomic64Load(RegStorage r_base, int displacement, RegStorage r_dest);
     LIR* GenAtomic64Store(RegStorage r_base, int displacement, RegStorage r_src);
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+
+    /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
+    void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage Solo64ToPair64(RegStorage reg);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 13ebc1e..886b238 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1071,6 +1071,13 @@
     // Update LIR for verbose listings.
     void UpdateLIROffsets();
 
+    /**
+     * @brief Mark a garbage collection card. Skip if the stored value is null.
+     * @param val_reg the register holding the stored value to check against null.
+     * @param tgt_addr_reg the address of the object or array where the value was stored.
+     */
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+
     /*
      * @brief Load the address of the dex method into the register.
      * @param target_method The MethodReference of the method to be invoked.
@@ -1139,7 +1146,12 @@
                                OpSize size, VolatileKind is_volatile) = 0;
     virtual LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                   int scale, OpSize size) = 0;
-    virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
+
+    /**
+     * @brief Unconditionally mark a garbage collection card.
+     * @param tgt_addr_reg the address of the object or array where the value was stored.
+     */
+    virtual void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) = 0;
 
     // Required for target - register utilities.
 
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 3933b21..84d68d2 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -424,15 +424,15 @@
   { kX86PextrbRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
   { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
   { kX86PextrdRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
-  { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrbMRI", "[!0r+!1d],!2r,!3d" },
-  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrwMRI", "[!0r+!1d],!2r,!3d" },
-  { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrdMRI", "[!0r+!1d],!2r,!3d" },
+  { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrbMRI", "[!0r+!1d],!2r,!3d" },
+  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrwMRI", "[!0r+!1d],!2r,!3d" },
+  { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrdMRI", "[!0r+!1d],!2r,!3d" },
 
   { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
   { kX86PshufdRRI,  kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
 
-  { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpsRRI", "!0r,!1r,!2d" },
-  { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpdRRI", "!0r,!1r,!2d" },
+  { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "ShufpsRRI", "!0r,!1r,!2d" },
+  { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "ShufpdRRI", "!0r,!1r,!2d" },
 
   { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
   { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 61dcc28..a808459 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -136,23 +136,16 @@
   StoreValue(rl_dest, rl_result);
 }
 
-/*
- * Mark garbage collection card. Skip if the value we're storing is null.
- */
-void X86Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   DCHECK_EQ(tgt_addr_reg.Is64Bit(), cu_->target64);
-  DCHECK_EQ(val_reg.Is64Bit(), cu_->target64);
   RegStorage reg_card_base = AllocTempRef();
   RegStorage reg_card_no = AllocTempRef();
-  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   int ct_offset = cu_->target64 ?
       Thread::CardTableOffset<8>().Int32Value() :
       Thread::CardTableOffset<4>().Int32Value();
   NewLIR2(cu_->target64 ? kX86Mov64RT : kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d57dffb..26641f8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -94,7 +94,10 @@
                      OpSize size, VolatileKind is_volatile) OVERRIDE;
   LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
                         OpSize size) OVERRIDE;
-  void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
+
+  /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
+  void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
+
   void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE;
 
   // Required for target - register utilities.
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ead31b3..998aeff 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -2263,7 +2263,8 @@
       StoreFinalValue(rl_dest, rl_result);
     } else {
       int displacement = SRegOffset(rl_result.s_reg_low);
-      LIR *l = NewLIR3(extr_opcode, rs_rX86_SP_32.GetReg(), displacement, vector_src.GetReg());
+      LIR *l = NewLIR4(extr_opcode, rs_rX86_SP_32.GetReg(), displacement, vector_src.GetReg(),
+                       extract_index);
       AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is_wide /* is_64bit */);
       AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is_wide /* is_64bit */);
     }
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index d3d76ba..ed33882 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -126,7 +126,7 @@
 
   for (uint32_t idx : bb->data_flow_info->def_v->Indexes()) {
     /* Block bb defines register idx */
-    temp_bit_matrix_[idx]->SetBit(bb->id);
+    temp_.ssa.def_block_matrix[idx]->SetBit(bb->id);
   }
   return true;
 }
@@ -135,16 +135,16 @@
   int num_registers = GetNumOfCodeAndTempVRs();
   /* Allocate num_registers bit vector pointers */
   DCHECK(temp_scoped_alloc_ != nullptr);
-  DCHECK(temp_bit_matrix_ == nullptr);
-  temp_bit_matrix_ = static_cast<ArenaBitVector**>(
+  DCHECK(temp_.ssa.def_block_matrix == nullptr);
+  temp_.ssa.def_block_matrix = static_cast<ArenaBitVector**>(
       temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * num_registers, kArenaAllocDFInfo));
   int i;
 
   /* Initialize num_register vectors with num_blocks bits each */
   for (i = 0; i < num_registers; i++) {
-    temp_bit_matrix_[i] = new (temp_scoped_alloc_.get()) ArenaBitVector(arena_, GetNumBlocks(),
-                                                                        false, kBitMapBMatrix);
-    temp_bit_matrix_[i]->ClearAllBits();
+    temp_.ssa.def_block_matrix[i] = new (temp_scoped_alloc_.get()) ArenaBitVector(
+        arena_, GetNumBlocks(), false, kBitMapBMatrix);
+    temp_.ssa.def_block_matrix[i]->ClearAllBits();
   }
 
   AllNodesIterator iter(this);
@@ -163,7 +163,7 @@
   int num_regs = GetNumOfCodeVRs();
   int in_reg = GetFirstInVR();
   for (; in_reg < num_regs; in_reg++) {
-    temp_bit_matrix_[in_reg]->SetBit(GetEntryBlock()->id);
+    temp_.ssa.def_block_matrix[in_reg]->SetBit(GetEntryBlock()->id);
   }
 }
 
@@ -435,32 +435,32 @@
  * insert a phi node if the variable is live-in to the block.
  */
 bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
-  DCHECK_EQ(temp_bit_vector_size_, cu_->mir_graph.get()->GetNumOfCodeAndTempVRs());
-  ArenaBitVector* temp_dalvik_register_v = temp_bit_vector_;
+  DCHECK_EQ(temp_.ssa.num_vregs, cu_->mir_graph.get()->GetNumOfCodeAndTempVRs());
+  ArenaBitVector* temp_live_vregs = temp_.ssa.work_live_vregs;
 
   if (bb->data_flow_info == NULL) {
     return false;
   }
-  temp_dalvik_register_v->Copy(bb->data_flow_info->live_in_v);
+  temp_live_vregs->Copy(bb->data_flow_info->live_in_v);
   BasicBlock* bb_taken = GetBasicBlock(bb->taken);
   BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through);
   if (bb_taken && bb_taken->data_flow_info)
-    ComputeSuccLineIn(temp_dalvik_register_v, bb_taken->data_flow_info->live_in_v,
+    ComputeSuccLineIn(temp_live_vregs, bb_taken->data_flow_info->live_in_v,
                       bb->data_flow_info->def_v);
   if (bb_fall_through && bb_fall_through->data_flow_info)
-    ComputeSuccLineIn(temp_dalvik_register_v, bb_fall_through->data_flow_info->live_in_v,
+    ComputeSuccLineIn(temp_live_vregs, bb_fall_through->data_flow_info->live_in_v,
                       bb->data_flow_info->def_v);
   if (bb->successor_block_list_type != kNotUsed) {
     for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
       BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
       if (succ_bb->data_flow_info) {
-        ComputeSuccLineIn(temp_dalvik_register_v, succ_bb->data_flow_info->live_in_v,
+        ComputeSuccLineIn(temp_live_vregs, succ_bb->data_flow_info->live_in_v,
                           bb->data_flow_info->def_v);
       }
     }
   }
-  if (!temp_dalvik_register_v->Equal(bb->data_flow_info->live_in_v)) {
-    bb->data_flow_info->live_in_v->Copy(temp_dalvik_register_v);
+  if (!temp_live_vregs->Equal(bb->data_flow_info->live_in_v)) {
+    bb->data_flow_info->live_in_v->Copy(temp_live_vregs);
     return true;
   }
   return false;
@@ -482,7 +482,7 @@
 
   /* Iterate through each Dalvik register */
   for (dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
-    input_blocks->Copy(temp_bit_matrix_[dalvik_reg]);
+    input_blocks->Copy(temp_.ssa.def_block_matrix[dalvik_reg]);
     phi_blocks->ClearAllBits();
     do {
       // TUNING: When we repeat this, we could skip indexes from the previous pass.
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 9f0a696..17328c4 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -282,6 +282,10 @@
     Instruction::Code code = inst->Opcode();
     if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
       uint32_t dex_pc = inst->GetDexPc(code_item->insns_);
+      if (!method_verifier->GetInstructionFlags(dex_pc).IsVisited()) {
+        // Do not attempt to quicken this instruction, it's unreachable anyway.
+        continue;
+      }
       const verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
       bool is_safe_cast = false;
       if (code == Instruction::CHECK_CAST) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 08041e8..2e9f835 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1986,6 +1986,7 @@
     case kArm:
     case kArm64:
     case kThumb2:
+    case kMips:
     case kX86:
     case kX86_64: return true;
     default: return false;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index be8631a..b261460 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -305,6 +305,15 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
+                                    Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
   HInstruction* second = LoadLocal(instruction.VRegB(), type);
@@ -313,6 +322,14 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction,
                               Primitive::Type type,
                               uint32_t dex_pc) {
@@ -1141,6 +1158,36 @@
       break;
     }
 
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::OR_INT: {
       Binop_23x<HOr>(instruction, Primitive::kPrimInt);
       break;
@@ -1240,6 +1287,36 @@
       break;
     }
 
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::DIV_FLOAT_2ADDR: {
       Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
@@ -1354,6 +1431,21 @@
       break;
     }
 
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false);
+      break;
+    }
+
     case Instruction::NEW_INSTANCE: {
       current_block_->AddInstruction(
           new (arena_) HNewInstance(dex_pc, instruction.VRegB_21c()));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 897bcec..204005d 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -105,12 +105,18 @@
   void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_22b(const Instruction& instruction, bool reverse);
 
   template<typename T>
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1701ef5..a204e21 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2087,6 +2087,124 @@
   }
 }
 
+void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary::CallKind call_kind = op->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(op, call_kind);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      // The runtime helper puts the output in R0,R2.
+      locations->SetOut(Location::RegisterPairLocation(R0, R2));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = op->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register out_reg = out.As<Register>();
+      Register first_reg = first.As<Register>();
+      // Arm doesn't mask the shift count so we need to do it ourselves.
+      if (second.IsRegister()) {
+        Register second_reg = second.As<Register>();
+        __ and_(second_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
+        if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, second_reg);
+        } else {
+          __ Lsr(out_reg, first_reg, second_reg);
+        }
+      } else {
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
+        if (shift_value == 0) {  // arm does not support shifting with 0 immediate.
+          __ Mov(out_reg, first_reg);
+        } else if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, shift_value);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, shift_value);
+        } else {
+          __ Lsr(out_reg, first_reg, shift_value);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      // TODO: Inline the assembly instead of calling the runtime.
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.As<Register>());
+      DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+
+      int32_t entry_point_offset;
+      if (op->IsShl()) {
+        entry_point_offset = QUICK_ENTRY_POINT(pShlLong);
+      } else if (op->IsShr()) {
+        entry_point_offset = QUICK_ENTRY_POINT(pShrLong);
+      } else {
+        entry_point_offset = QUICK_ENTRY_POINT(pUshrLong);
+      }
+      __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+      __ blx(LR);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+  }
+}
+
+void LocationsBuilderARM::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARM::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderARM::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARM::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderARM::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARM::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c00fac1..226e635 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -109,6 +109,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,6 +137,7 @@
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 82dced5..7a8b941 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -801,7 +801,10 @@
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   M(ParallelMove)                                          \
-  M(Rem)
+  M(Rem)                                                   \
+  M(Shl)                                                   \
+  M(Shr)                                                   \
+  M(UShr)                                                  \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3c53cea..917b7dd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2129,6 +2129,139 @@
   }
 }
 
+void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected op type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  DCHECK(first.Equals(locations->Out()));
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      Register first_reg = first.As<Register>();
+      if (second.IsRegister()) {
+        Register second_reg = second.As<Register>();
+        DCHECK_EQ(ECX, second_reg);
+        if (op->IsShl()) {
+          __ shll(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, second_reg);
+        } else {
+          __ shrl(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shll(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, imm);
+        } else {
+          __ shrl(first_reg, imm);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      Register second_reg = second.As<Register>();
+      DCHECK_EQ(ECX, second_reg);
+      if (op->IsShl()) {
+        GenerateShlLong(first, second_reg);
+      } else if (op->IsShr()) {
+        GenerateShrLong(first, second_reg);
+      } else {
+        GenerateUShrLong(first, second_reg);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected op type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
+  __ shll(loc.AsRegisterPairLow<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
+  __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
+  __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
+  __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
+  __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
+  __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
+  __ Bind(&done);
+}
+
+void LocationsBuilderX86::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderX86::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0aff6cc..aed06c0 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -104,6 +104,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleShift(HBinaryOperation* instruction);
 
   CodeGeneratorX86* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -132,6 +133,10 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleShift(HBinaryOperation* instruction);
+  void GenerateShlLong(const Location& loc, Register shifter);
+  void GenerateShrLong(const Location& loc, Register shifter);
+  void GenerateUShrLong(const Location& loc, Register shifter);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 97f5e5c..69f031a 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2026,6 +2026,107 @@
   }
 }
 
+void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).As<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.As<CpuRegister>();
+        if (op->IsShl()) {
+          __ shll(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, second_reg);
+        } else {
+          __ shrl(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shll(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, imm);
+        } else {
+          __ shrl(first_reg, imm);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.As<CpuRegister>();
+        if (op->IsShl()) {
+          __ shlq(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarq(first_reg, second_reg);
+        } else {
+          __ shrq(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shlq(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarq(first_reg, imm);
+        } else {
+          __ shrq(first_reg, imm);
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderX86_64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 29c679d..794b81f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -108,6 +108,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   CodeGeneratorX86_64* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,6 +137,7 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleShift(HBinaryOperation* operation);
 
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 19cd120..f562113 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -42,6 +42,9 @@
 static const int kDefaultNumberOfDominatedBlocks = 1;
 static const int kDefaultNumberOfBackEdges = 1;
 
+static constexpr uint32_t kMaxIntShiftValue = 0x1f;
+static constexpr uint64_t kMaxLongShiftValue = 0x3f;
+
 enum IfCondition {
   kCondEQ,
   kCondNE,
@@ -521,9 +524,11 @@
   M(ParallelMove, Instruction)                                          \
   M(ParameterValue, Instruction)                                        \
   M(Phi, Instruction)                                                   \
-  M(Rem, BinaryOperation)                                             \
+  M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Shl, BinaryOperation)                                               \
+  M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
   M(StaticFieldSet, Instruction)                                        \
   M(StoreLocal, Instruction)                                            \
@@ -532,6 +537,7 @@
   M(Temporary, Instruction)                                             \
   M(Throw, Instruction)                                                 \
   M(TypeConversion, Instruction)                                        \
+  M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
 #define FOR_EACH_INSTRUCTION(M)                                         \
@@ -1831,6 +1837,57 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
+class HShl : public HBinaryOperation {
+ public:
+  HShl(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x << (y & kMaxIntShiftValue); }
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x << (y & kMaxLongShiftValue); }
+
+  DECLARE_INSTRUCTION(Shl);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShl);
+};
+
+class HShr : public HBinaryOperation {
+ public:
+  HShr(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x >> (y & kMaxIntShiftValue); }
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x >> (y & kMaxLongShiftValue); }
+
+  DECLARE_INSTRUCTION(Shr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShr);
+};
+
+class HUShr : public HBinaryOperation {
+ public:
+  HUShr(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    uint32_t ux = static_cast<uint32_t>(x);
+    uint32_t uy = static_cast<uint32_t>(y) & kMaxIntShiftValue;
+    return static_cast<int32_t>(ux >> uy);
+  }
+
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    uint64_t ux = static_cast<uint64_t>(x);
+    uint64_t uy = static_cast<uint64_t>(y) & kMaxLongShiftValue;
+    return static_cast<int64_t>(ux >> uy);
+  }
+
+  DECLARE_INSTRUCTION(UShr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HUShr);
+};
+
 class HAnd : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a1594b0..a541763 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1079,7 +1079,7 @@
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  CHECK_LE(shift_imm, 31u);
   if (setcc) {
     movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
   } else {
@@ -1090,7 +1090,7 @@
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   if (setcc) {
     movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
@@ -1102,7 +1102,7 @@
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   if (setcc) {
     movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
@@ -1114,7 +1114,7 @@
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  CHECK(1u <= shift_imm && shift_imm <= 31u);
   if (setcc) {
     movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
   } else {
diff --git a/compiler/utils/arm/assembler_arm_test.h b/compiler/utils/arm/assembler_arm_test.h
index 34475b2..838abb6 100644
--- a/compiler/utils/arm/assembler_arm_test.h
+++ b/compiler/utils/arm/assembler_arm_test.h
@@ -159,7 +159,7 @@
   std::string RepeatRRiiC(void (Ass::*f)(Reg, Reg, Imm, Imm, Cond),
                           std::vector<std::pair<Imm, Imm>>& immediates,
                           std::string fmt) {
-    return RepeatTemplatedRRiiC(f, GetRegisters(), GetRegisters(),
+    return RepeatTemplatedRRiiC<Reg, Reg>(f, GetRegisters(), GetRegisters(),
         &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>,
         &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>,
         immediates, fmt);
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a349209..a377cb2 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2210,7 +2210,7 @@
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  CHECK_LE(shift_imm, 31u);
   CheckCondition(cond);
   EmitShift(rd, rm, LSL, shift_imm, setcc);
 }
@@ -2218,7 +2218,7 @@
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
   EmitShift(rd, rm, LSR, shift_imm, setcc);
@@ -2227,7 +2227,7 @@
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
   EmitShift(rd, rm, ASR, shift_imm, setcc);
@@ -2236,7 +2236,7 @@
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  CHECK(1u <= shift_imm && shift_imm <= 31u);
   CheckCondition(cond);
   EmitShift(rd, rm, ROR, shift_imm, setcc);
 }
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 1fadb91..2b55120 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -226,6 +226,10 @@
     UNREACHABLE();
   }
 
+  std::string GetRegisterName(const Reg& reg) {
+    return GetRegName<RegisterView::kUsePrimaryName>(reg);
+  }
+
  protected:
   explicit AssemblerTest() {}
 
diff --git a/compiler/utils/dex_instruction_utils.h b/compiler/utils/dex_instruction_utils.h
new file mode 100644
index 0000000..ad7d750
--- /dev/null
+++ b/compiler/utils/dex_instruction_utils.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEX_INSTRUCTION_UTILS_H_
+#define ART_COMPILER_UTILS_DEX_INSTRUCTION_UTILS_H_
+
+#include "dex_instruction.h"
+
+namespace art {
+
+// Dex invoke type corresponds to the ordering of INVOKE instructions;
+// this order is the same for range and non-range invokes.
+enum DexInvokeType : uint8_t {
+  kDexInvokeVirtual = 0,  // invoke-virtual, invoke-virtual-range
+  kDexInvokeSuper,        // invoke-super, invoke-super-range
+  kDexInvokeDirect,       // invoke-direct, invoke-direct-range
+  kDexInvokeStatic,       // invoke-static, invoke-static-range
+  kDexInvokeInterface,    // invoke-interface, invoke-interface-range
+  kDexInvokeTypeCount
+};
+
+// Dex instruction memory access types correspond to the ordering of GET/PUT instructions;
+// this order is the same for IGET, IPUT, SGET, SPUT, AGET and APUT.
+enum DexMemAccessType : uint8_t {
+  kDexMemAccessWord = 0,  // op         0; int or float, the actual type is not encoded.
+  kDexMemAccessWide,      // op_WIDE    1; long or double, the actual type is not encoded.
+  kDexMemAccessObject,    // op_OBJECT  2; the actual reference type is not encoded.
+  kDexMemAccessBoolean,   // op_BOOLEAN 3
+  kDexMemAccessByte,      // op_BYTE    4
+  kDexMemAccessChar,      // op_CHAR    5
+  kDexMemAccessShort,     // op_SHORT   6
+  kDexMemAccessTypeCount
+};
+
+std::ostream& operator<<(std::ostream& os, const DexMemAccessType& type);
+
+// NOTE: The following functions disregard quickened instructions.
+
+constexpr bool IsInstructionInvoke(Instruction::Code opcode) {
+  return Instruction::INVOKE_VIRTUAL <= opcode && opcode <= Instruction::INVOKE_INTERFACE_RANGE &&
+      opcode != Instruction::RETURN_VOID_BARRIER;
+}
+
+constexpr bool IsInstructionInvokeStatic(Instruction::Code opcode) {
+  return opcode == Instruction::INVOKE_STATIC || opcode == Instruction::INVOKE_STATIC_RANGE;
+}
+
+constexpr bool IsInstructionGoto(Instruction::Code opcode) {
+  return Instruction::GOTO <= opcode && opcode <= Instruction::GOTO_32;
+}
+
+constexpr bool IsInstructionIfCc(Instruction::Code opcode) {
+  return Instruction::IF_EQ <= opcode && opcode <= Instruction::IF_LE;
+}
+
+constexpr bool IsInstructionIfCcZ(Instruction::Code opcode) {
+  return Instruction::IF_EQZ <= opcode && opcode <= Instruction::IF_LEZ;
+}
+
+constexpr bool IsInstructionIGet(Instruction::Code code) {
+  return Instruction::IGET <= code && code <= Instruction::IGET_SHORT;
+}
+
+constexpr bool IsInstructionIPut(Instruction::Code code) {
+  return Instruction::IPUT <= code && code <= Instruction::IPUT_SHORT;
+}
+
+constexpr bool IsInstructionSGet(Instruction::Code code) {
+  return Instruction::SGET <= code && code <= Instruction::SGET_SHORT;
+}
+
+constexpr bool IsInstructionSPut(Instruction::Code code) {
+  return Instruction::SPUT <= code && code <= Instruction::SPUT_SHORT;
+}
+
+constexpr bool IsInstructionAGet(Instruction::Code code) {
+  return Instruction::AGET <= code && code <= Instruction::AGET_SHORT;
+}
+
+constexpr bool IsInstructionAPut(Instruction::Code code) {
+  return Instruction::APUT <= code && code <= Instruction::APUT_SHORT;
+}
+
+constexpr bool IsInstructionIGetOrIPut(Instruction::Code code) {
+  return Instruction::IGET <= code && code <= Instruction::IPUT_SHORT;
+}
+
+constexpr bool IsInstructionSGetOrSPut(Instruction::Code code) {
+  return Instruction::SGET <= code && code <= Instruction::SPUT_SHORT;
+}
+
+constexpr bool IsInstructionAGetOrAPut(Instruction::Code code) {
+  return Instruction::AGET <= code && code <= Instruction::APUT_SHORT;
+}
+
+// TODO: Remove the #if guards below when we fully migrate to C++14.
+
+constexpr bool IsInvokeInstructionRange(Instruction::Code opcode) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionInvoke(opcode));
+#endif
+  return opcode >= Instruction::INVOKE_VIRTUAL_RANGE;
+}
+
+constexpr DexInvokeType InvokeInstructionType(Instruction::Code opcode) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionInvoke(opcode));
+#endif
+  return static_cast<DexInvokeType>(IsInvokeInstructionRange(opcode)
+                                    ? (opcode - Instruction::INVOKE_VIRTUAL_RANGE)
+                                    : (opcode - Instruction::INVOKE_VIRTUAL));
+}
+
+constexpr DexMemAccessType IGetMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionIGet(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::IGET);
+}
+
+constexpr DexMemAccessType IPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionIPut(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::IPUT);
+}
+
+constexpr DexMemAccessType SGetMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionSGet(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::SGET);
+}
+
+constexpr DexMemAccessType SPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionSPut(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::SPUT);
+}
+
+constexpr DexMemAccessType AGetMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionAGet(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::AGET);
+}
+
+constexpr DexMemAccessType APutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionAPut(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::APUT);
+}
+
+constexpr DexMemAccessType IGetOrIPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionIGetOrIPut(opcode));
+#endif
+  return (code >= Instruction::IPUT) ? IPutMemAccessType(code) : IGetMemAccessType(code);
+}
+
+constexpr DexMemAccessType SGetOrSPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionSGetOrSPut(opcode));
+#endif
+  return (code >= Instruction::SPUT) ? SPutMemAccessType(code) : SGetMemAccessType(code);
+}
+
+constexpr DexMemAccessType AGetOrAPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionAGetOrAPut(opcode));
+#endif
+  return (code >= Instruction::APUT) ? APutMemAccessType(code) : AGetMemAccessType(code);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEX_INSTRUCTION_UTILS_H_
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index 2616150..d9e0619 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -96,6 +96,7 @@
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
+    CHECK(ptr != nullptr) << "Failed to allocate memory";
   }
   CurrentStats()->RecordAlloc(bytes, kind);
   top_ptr_ = ptr + rounded_bytes;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index afa4a3b..a297ea3 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1126,7 +1126,8 @@
 }
 
 
-void X86Assembler::shld(Register dst, Register src) {
+void X86Assembler::shld(Register dst, Register src, Register shifter) {
+  DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0xA5);
@@ -1134,6 +1135,15 @@
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, Register shifter) {
+  DCHECK_EQ(ECX, shifter);
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAD);
+  EmitRegisterOperand(src, dst);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8aed934..6ea66a5 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -405,7 +405,8 @@
   void shrl(Register operand, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
-  void shld(Register dst, Register src);
+  void shld(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, Register shifter);
 
   void negl(Register reg);
   void notl(Register reg);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 8c428f4..dff3849 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1451,8 +1451,18 @@
 }
 
 
+void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 4, reg, imm);
+}
+
+
 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(false, 4, operand, shifter);
+}
+
+
+void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 4, operand, shifter);
 }
 
 
@@ -1467,7 +1477,12 @@
 
 
 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(false, 5, operand, shifter);
+}
+
+
+void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 5, operand, shifter);
 }
 
 
@@ -1477,7 +1492,17 @@
 
 
 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(false, 7, operand, shifter);
+}
+
+
+void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 7, reg, imm);
+}
+
+
+void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 7, operand, shifter);
 }
 
 
@@ -1826,12 +1851,17 @@
 }
 
 
-void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
+void X86_64Assembler::EmitGenericShift(bool wide,
+                                       int reg_or_opcode,
                                        CpuRegister operand,
                                        CpuRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter.AsRegister(), RCX);
-  EmitOptionalRex32(operand);
+  if (wide) {
+    EmitRex64(operand);
+  } else {
+    EmitOptionalRex32(operand);
+  }
   EmitUint8(0xD3);
   EmitOperand(reg_or_opcode, Operand(operand));
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 4dd70e2..ab1bc9e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -460,7 +460,12 @@
   void sarl(CpuRegister reg, const Immediate& imm);
   void sarl(CpuRegister operand, CpuRegister shifter);
 
+  void shlq(CpuRegister reg, const Immediate& imm);
+  void shlq(CpuRegister operand, CpuRegister shifter);
   void shrq(CpuRegister reg, const Immediate& imm);
+  void shrq(CpuRegister operand, CpuRegister shifter);
+  void sarq(CpuRegister reg, const Immediate& imm);
+  void sarq(CpuRegister operand, CpuRegister shifter);
 
   void negl(CpuRegister reg);
   void negq(CpuRegister reg);
@@ -657,7 +662,7 @@
   void EmitNearLabelLink(Label* label);
 
   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
-  void EmitGenericShift(int rm, CpuRegister operand, CpuRegister shifter);
+  void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
 
   // If any input is not false, output the necessary rex prefix.
   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index af389e6..14a98b9 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -296,7 +296,7 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::subl, 4U, "sub ${imm}, %{reg}"), "subli");
 }
 
-// Shll only allows CL as the shift register.
+// Shll only allows CL as the shift count.
 std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -319,7 +319,31 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::shll, 1U, "shll ${imm}, %{reg}"), "shlli");
 }
 
-// Shrl only allows CL as the shift register.
+// Shlq only allows CL as the shift count.
+std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shlq(*reg, shifter);
+    str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+  printf("%s\n", str.str().c_str());
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShlqReg) {
+  DriverFn(&shlq_fn, "shlq");
+}
+
+TEST_F(AssemblerX86_64Test, ShlqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::shlq, 1U, "shlq ${imm}, %{reg}"), "shlqi");
+}
+
+// Shrl only allows CL as the shift count.
 std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -342,7 +366,30 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::shrl, 1U, "shrl ${imm}, %{reg}"), "shrli");
 }
 
-// Sarl only allows CL as the shift register.
+// Shrq only allows CL as the shift count.
+std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shrq(*reg, shifter);
+    str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShrqReg) {
+  DriverFn(&shrq_fn, "shrq");
+}
+
+TEST_F(AssemblerX86_64Test, ShrqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::shrq, 1U, "shrq ${imm}, %{reg}"), "shrqi");
+}
+
+// Sarl only allows CL as the shift count.
 std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -365,6 +412,29 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::sarl, 1U, "sarl ${imm}, %{reg}"), "sarli");
 }
 
+// Sarq only allows CL as the shift count.
+std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->sarq(*reg, shifter);
+    str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, SarqReg) {
+  DriverFn(&sarq_fn, "sarq");
+}
+
+TEST_F(AssemblerX86_64Test, SarqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::sarq, 1U, "sarq ${imm}, %{reg}"), "sarqi");
+}
+
 TEST_F(AssemblerX86_64Test, CmpqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::cmpq, "cmpq %{reg2}, %{reg1}"), "cmpq");
 }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index feee598..08352de 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -386,7 +386,8 @@
     : oat_file_(oat_file),
       oat_dex_files_(oat_file.GetOatDexFiles()),
       options_(options),
-      disassembler_(Disassembler::Create(oat_file_.GetOatHeader().GetInstructionSet(),
+      instruction_set_(oat_file_.GetOatHeader().GetInstructionSet()),
+      disassembler_(Disassembler::Create(instruction_set_,
                                          new DisassemblerOptions(options_->absolute_addresses_,
                                                                  oat_file.Begin(),
                                                                  true /* can_read_litals_ */))) {
@@ -399,6 +400,10 @@
     delete disassembler_;
   }
 
+  InstructionSet GetInstructionSet() {
+    return instruction_set_;
+  }
+
   bool Dump(std::ostream& os) {
     bool success = true;
     const OatHeader& oat_header = oat_file_.GetOatHeader();
@@ -515,7 +520,7 @@
     return end_offset - begin_offset;
   }
 
-  InstructionSet GetInstructionSet() {
+  InstructionSet GetOatInstructionSet() {
     return oat_file_.GetOatHeader().GetInstructionSet();
   }
 
@@ -1260,6 +1265,7 @@
   const OatFile& oat_file_;
   const std::vector<const OatFile::OatDexFile*> oat_dex_files_;
   const OatDumperOptions* options_;
+  InstructionSet instruction_set_;
   std::set<uintptr_t> offsets_;
   Disassembler* disassembler_;
 };
@@ -1521,7 +1527,8 @@
 
   const void* GetQuickOatCodeBegin(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const void* quick_code = m->GetEntryPointFromQuickCompiledCode();
+    const void* quick_code = m->GetEntryPointFromQuickCompiledCodePtrSize(
+        InstructionSetPointerSize(oat_dumper_->GetOatInstructionSet()));
     if (Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(quick_code)) {
       quick_code = oat_dumper_->GetQuickOatCode(m);
     }
@@ -1622,11 +1629,14 @@
         }
       }
     } else if (obj->IsArtMethod()) {
+      const size_t image_pointer_size = InstructionSetPointerSize(
+          state->oat_dumper_->GetOatInstructionSet());
       mirror::ArtMethod* method = obj->AsArtMethod();
       if (method->IsNative()) {
         // TODO: portable dumping.
-        DCHECK(method->GetNativeGcMap() == nullptr) << PrettyMethod(method);
-        DCHECK(method->GetMappingTable() == nullptr) << PrettyMethod(method);
+        DCHECK(method->GetNativeGcMapPtrSize(image_pointer_size) == nullptr)
+            << PrettyMethod(method);
+        DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
         bool first_occurrence;
         const void* quick_oat_code = state->GetQuickOatCodeBegin(method);
         uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
@@ -1634,33 +1644,36 @@
         if (first_occurrence) {
           state->stats_.native_to_managed_code_bytes += quick_oat_code_size;
         }
-        if (quick_oat_code != method->GetEntryPointFromQuickCompiledCode()) {
+        if (quick_oat_code != method->GetEntryPointFromQuickCompiledCodePtrSize(
+            image_pointer_size)) {
           indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code);
         }
       } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
           method->IsResolutionMethod() || method->IsImtConflictMethod() ||
           method->IsImtUnimplementedMethod() || method->IsClassInitializer()) {
-        DCHECK(method->GetNativeGcMap() == nullptr) << PrettyMethod(method);
-        DCHECK(method->GetMappingTable() == nullptr) << PrettyMethod(method);
+        DCHECK(method->GetNativeGcMapPtrSize(image_pointer_size) == nullptr)
+            << PrettyMethod(method);
+        DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
       } else {
         const DexFile::CodeItem* code_item = method->GetCodeItem();
         size_t dex_instruction_bytes = code_item->insns_size_in_code_units_ * 2;
         state->stats_.dex_instruction_bytes += dex_instruction_bytes;
 
         bool first_occurrence;
-        size_t gc_map_bytes = state->ComputeOatSize(method->GetNativeGcMap(), &first_occurrence);
+        size_t gc_map_bytes = state->ComputeOatSize(
+            method->GetNativeGcMapPtrSize(image_pointer_size), &first_occurrence);
         if (first_occurrence) {
           state->stats_.gc_map_bytes += gc_map_bytes;
         }
 
         size_t pc_mapping_table_bytes =
-            state->ComputeOatSize(method->GetMappingTable(), &first_occurrence);
+            state->ComputeOatSize(method->GetMappingTable(image_pointer_size), &first_occurrence);
         if (first_occurrence) {
           state->stats_.pc_mapping_table_bytes += pc_mapping_table_bytes;
         }
 
         size_t vmap_table_bytes =
-            state->ComputeOatSize(method->GetVmapTable(), &first_occurrence);
+            state->ComputeOatSize(method->GetVmapTable(image_pointer_size), &first_occurrence);
         if (first_occurrence) {
           state->stats_.vmap_table_bytes += vmap_table_bytes;
         }
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 5bece18..02c0982 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -19,8 +19,8 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 48
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 48
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 64
 
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 4824857..44feee6 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -26,44 +26,48 @@
     /* Deliver an exception pending on a thread */
     .extern artDeliverPendingExceptionFromCode
 
+#define ARG_SLOT_SIZE   32    // space for a0-a3 plus 4 more words
+
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
-     * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word padding + 4 open words for args
-     * Clobbers $t0 and $gp
+     * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word for Method*
+     * Clobbers $t0 and $sp
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
      */
 .macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    addiu  $sp, $sp, -64
-    .cfi_adjust_cfa_offset 64
+    addiu  $sp, $sp, -48
+    .cfi_adjust_cfa_offset 48
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 64)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 48)
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS) size not as expected."
 #endif
 
-    sw     $ra, 60($sp)
-    .cfi_rel_offset 31, 60
-    sw     $s8, 56($sp)
-    .cfi_rel_offset 30, 56
-    sw     $gp, 52($sp)
-    .cfi_rel_offset 28, 52
-    sw     $s7, 48($sp)
-    .cfi_rel_offset 23, 48
-    sw     $s6, 44($sp)
-    .cfi_rel_offset 22, 44
-    sw     $s5, 40($sp)
-    .cfi_rel_offset 21, 40
-    sw     $s4, 36($sp)
-    .cfi_rel_offset 20, 36
-    sw     $s3, 32($sp)
-    .cfi_rel_offset 19, 32
-    sw     $s2, 28($sp)
-    .cfi_rel_offset 18, 28
-    sw     $s1, 24($sp)
-    .cfi_rel_offset 17, 24
-    sw     $s0, 20($sp)
-    .cfi_rel_offset 16, 20
-    # 1 word for alignment, 4 open words for args $a0-$a3, bottom will hold Method*
+    sw     $ra, 44($sp)
+    .cfi_rel_offset 31, 44
+    sw     $s8, 40($sp)
+    .cfi_rel_offset 30, 40
+    sw     $gp, 36($sp)
+    .cfi_rel_offset 28, 36
+    sw     $s7, 32($sp)
+    .cfi_rel_offset 23, 32
+    sw     $s6, 28($sp)
+    .cfi_rel_offset 22, 28
+    sw     $s5, 24($sp)
+    .cfi_rel_offset 21, 24
+    sw     $s4, 20($sp)
+    .cfi_rel_offset 20, 20
+    sw     $s3, 16($sp)
+    .cfi_rel_offset 19, 16
+    sw     $s2, 12($sp)
+    .cfi_rel_offset 18, 12
+    sw     $s1, 8($sp)
+    .cfi_rel_offset 17, 8
+    sw     $s0, 4($sp)
+    .cfi_rel_offset 16, 4
+    # 1 word for holding Method*
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
@@ -71,42 +75,47 @@
     lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes non-moving GC.
      * Does not include rSUSPEND or rSELF
-     * callee-save: $s2-$s8 + $gp + $ra, 9 total + 3 words padding + 4 open words for args
+     * callee-save: $s2-$s8 + $gp + $ra, 9 total + 2 words padding + 1 word to hold Method*
+     * Clobbers $t0 and $sp
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_REFS_ONLY_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
      */
 .macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
-    addiu  $sp, $sp, -64
-    .cfi_adjust_cfa_offset 64
+    addiu  $sp, $sp, -48
+    .cfi_adjust_cfa_offset 48
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 64)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 48)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS) size not as expected."
 #endif
 
-    sw     $ra, 60($sp)
-    .cfi_rel_offset 31, 60
-    sw     $s8, 56($sp)
-    .cfi_rel_offset 30, 56
-    sw     $gp, 52($sp)
-    .cfi_rel_offset 28, 52
-    sw     $s7, 48($sp)
-    .cfi_rel_offset 23, 48
-    sw     $s6, 44($sp)
-    .cfi_rel_offset 22, 44
-    sw     $s5, 40($sp)
-    .cfi_rel_offset 21, 40
-    sw     $s4, 36($sp)
-    .cfi_rel_offset 20, 36
-    sw     $s3, 32($sp)
-    .cfi_rel_offset 19, 32
-    sw     $s2, 28($sp)
-    .cfi_rel_offset 18, 28
-    # 3 words for alignment and extra args, 4 open words for args $a0-$a3, bottom will hold Method*
+    sw     $ra, 44($sp)
+    .cfi_rel_offset 31, 44
+    sw     $s8, 40($sp)
+    .cfi_rel_offset 30, 40
+    sw     $gp, 36($sp)
+    .cfi_rel_offset 28, 36
+    sw     $s7, 32($sp)
+    .cfi_rel_offset 23, 32
+    sw     $s6, 28($sp)
+    .cfi_rel_offset 22, 28
+    sw     $s5, 24($sp)
+    .cfi_rel_offset 21, 24
+    sw     $s4, 20($sp)
+    .cfi_rel_offset 20, 20
+    sw     $s3, 16($sp)
+    .cfi_rel_offset 19, 16
+    sw     $s2, 12($sp)
+    .cfi_rel_offset 18, 12
+    # 2 words for alignment and bottom word will hold Method*
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
@@ -114,61 +123,47 @@
     lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    lw     $ra, 60($sp)
+    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+    lw     $ra, 44($sp)
     .cfi_restore 31
-    lw     $s8, 56($sp)
+    lw     $s8, 40($sp)
     .cfi_restore 30
-    lw     $gp, 52($sp)
+    lw     $gp, 36($sp)
     .cfi_restore 28
-    lw     $s7, 48($sp)
+    lw     $s7, 32($sp)
     .cfi_restore 23
-    lw     $s6, 44($sp)
+    lw     $s6, 28($sp)
     .cfi_restore 22
-    lw     $s5, 40($sp)
+    lw     $s5, 24($sp)
     .cfi_restore 21
-    lw     $s4, 36($sp)
+    lw     $s4, 20($sp)
     .cfi_restore 20
-    lw     $s3, 32($sp)
+    lw     $s3, 16($sp)
     .cfi_restore 19
-    lw     $s2, 28($sp)
+    lw     $s2, 12($sp)
     .cfi_restore 18
-    addiu  $sp, $sp, 64
-    .cfi_adjust_cfa_offset -64
+    addiu  $sp, $sp, 48
+    .cfi_adjust_cfa_offset -48
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    lw     $ra, 60($sp)
-    .cfi_restore 31
-    lw     $s8, 56($sp)
-    .cfi_restore 30
-    lw     $gp, 52($sp)
-    .cfi_restore 28
-    lw     $s7, 48($sp)
-    .cfi_restore 23
-    lw     $s6, 44($sp)
-    .cfi_restore 22
-    lw     $s5, 40($sp)
-    .cfi_restore 21
-    lw     $s4, 36($sp)
-    .cfi_restore 20
-    lw     $s3, 32($sp)
-    .cfi_restore 19
-    lw     $s2, 28($sp)
-    .cfi_restore 18
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     jr     $ra
-    addiu  $sp, $sp, 64
-    .cfi_adjust_cfa_offset -64
+    nop
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
      * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     addiu  $sp, $sp, -64
     .cfi_adjust_cfa_offset 64
 
@@ -202,16 +197,48 @@
     sw     $a1, 4($sp)
     .cfi_rel_offset 5, 4
     # bottom will hold Method*
+.endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+     * Clobbers $t0 and $sp
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     */
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
     THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+     * Clobbers $sp
+     * Use $a0 as the Method* and loads it into bottom of stack.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     */
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+    sw $a0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 60($sp)
     .cfi_restore 31
     lw     $s8, 56($sp)
@@ -444,21 +471,15 @@
     .extern \cxx_name
 ENTRY \c_name
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
-    lw    $a2, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE($sp)                    # pass caller Method*
-    move  $t0, $sp                        # save $sp
-    addiu $sp, $sp, -32                   # make space for extra args
-    .cfi_adjust_cfa_offset 32
+    lw    $a2, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE+ARG_SLOT_SIZE($sp)    # pass caller Method*
+    addiu $t0, $sp, ARG_SLOT_SIZE         # save $sp (remove arg slots)
     move  $a3, rSELF                      # pass Thread::Current
-    .cfi_rel_offset 28, 12
     jal   \cxx_name                       # (method_idx, this, caller, Thread*, $sp)
     sw    $t0, 16($sp)                    # pass $sp
-    addiu $sp, $sp, 32                    # release out args
-    .cfi_adjust_cfa_offset -32
     move  $a0, $v0                        # save target Method*
-    move  $t9, $v1                        # save $v0->code_
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     beqz  $v0, 1f
-    nop
+    move  $t9, $v1                        # save $v0->code_
     jr    $t9
     nop
 1:
@@ -500,10 +521,10 @@
     .cfi_def_cfa_register 30
     move  $s1, $a3              # move managed thread pointer into s1
     addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval
-    addiu $t0, $a2, 16          # create space for method pointer in frame
-    srl   $t0, $t0, 4           # shift the frame size right 4
-    sll   $t0, $t0, 4           # shift the frame size left 4 to align to 16 bytes
-    subu  $sp, $sp, $t0         # reserve stack space for argument array
+    addiu $t0, $a2, 4           # create space for method pointer in frame.
+    subu  $t0, $sp, $t0         # reserve & align *stack* to 16 bytes:
+    srl   $t0, $t0, 4           # native calling convention only aligns to 8B,
+    sll   $sp, $t0, 4           # so we have to ensure ART 16B alignment ourselves.
     addiu $a0, $sp, 4           # pass stack pointer + method ptr as dest for memcpy
     jal   memcpy                # (dest, src, bytes)
     addiu $sp, $sp, -16         # make space for argument slots for memcpy
@@ -548,8 +569,8 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)                   # pass referrer's Method*
+    lw     $a2, 0($sp)                    # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case exception allocation triggers GC
     jal    artHandleFillArrayDataFromCode # (payload offset, Array*, method, Thread*)
     move   $a3, rSELF                     # pass Thread::Current
     RETURN_IF_ZERO
@@ -562,7 +583,7 @@
 ENTRY art_quick_lock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME      # save callee saves in case we block
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -575,7 +596,7 @@
 ENTRY art_quick_unlock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode   # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
     RETURN_IF_ZERO
@@ -594,7 +615,8 @@
     sw     $a1, 4($sp)
     sw     $a0, 0($sp)
     jal    artIsAssignableFromCode
-    nop
+    addiu  $sp, $sp, -16             # reserve argument slots on the stack
+    addiu  $sp, $sp, 16
     beqz   $v0, .Lthrow_class_cast_exception
     lw     $ra, 12($sp)
     jr     $ra
@@ -670,7 +692,8 @@
     move   $a1, $t1
     move   $a0, $t0
     jal    artIsAssignableFromCode  # (Class*, Class*)
-    nop
+    addiu $sp, $sp, -16     # reserve argument slots on the stack
+    addiu $sp, $sp, 16
     lw     $ra, 28($sp)
     lw     $t9, 12($sp)
     lw     $a2, 8($sp)
@@ -694,7 +717,7 @@
      */
     .extern artInitializeStaticStorageFromCode
 ENTRY art_quick_initialize_static_storage
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME            # save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
     # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*)
     jal     artInitializeStaticStorageFromCode
     move    $a2, rSELF                          # pass Thread::Current
@@ -706,7 +729,7 @@
      */
     .extern artInitializeTypeFromCode
 ENTRY art_quick_initialize_type
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves in case of GC
     # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
     jal     artInitializeTypeFromCode
     move    $a2, rSELF                         # pass Thread::Current
@@ -719,7 +742,7 @@
      */
     .extern artInitializeTypeAndVerifyAccessFromCode
 ENTRY art_quick_initialize_type_and_verify_access
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves in case of GC
     # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
     jal     artInitializeTypeAndVerifyAccessFromCode
     move    $a2, rSELF                         # pass Thread::Current
@@ -730,8 +753,8 @@
      */
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -741,8 +764,8 @@
      */
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -753,8 +776,8 @@
      */
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -764,8 +787,8 @@
      */
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -776,8 +799,8 @@
      */
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -788,8 +811,8 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -800,8 +823,8 @@
      */
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -812,8 +835,8 @@
      */
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -823,8 +846,8 @@
      */
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -835,8 +858,8 @@
      */
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -846,9 +869,9 @@
      */
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
-    jal    artGetShortInstanceFromCode      # (field_idx, Object*, referrer, Thread*)
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    jal    artGetShortInstanceFromCode   # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_short_instance
@@ -858,11 +881,10 @@
      */
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get32_instance
 
@@ -871,11 +893,10 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get64_instance
 
@@ -884,8 +905,8 @@
      */
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -896,8 +917,8 @@
      */
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -908,8 +929,8 @@
      */
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet16StaticFromCode        # (field_idx, new_val, referrer, Thread*, $sp)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -920,8 +941,8 @@
      */
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -932,8 +953,8 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a1, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
@@ -944,10 +965,10 @@
      */
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
-    jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
+    lw     $a2, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     move   $a3, rSELF                    # pass Thread::Current
+    jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
     RETURN_IF_ZERO
 END art_quick_set_obj_static
 
@@ -956,9 +977,9 @@
      */
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
-    jal    artSet8InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
+    lw     $a3, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    jal    artSet8InstanceFromCode       # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set8_instance
@@ -968,8 +989,8 @@
      */
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a3, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
@@ -980,8 +1001,8 @@
      */
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a3, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
@@ -992,11 +1013,11 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $t0, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # load referrer's Method*
+    lw     $t1, 0($sp)                   # load referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     sw     rSELF, 20($sp)                # pass Thread::Current
     jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
-    sw     $t0, 16($sp)                  # pass referrer's Method*
+    sw     $t1, 16($sp)                  # pass referrer's Method*
     RETURN_IF_ZERO
 END art_quick_set64_instance
 
@@ -1005,8 +1026,8 @@
      */
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    lw     $a3, 0($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
@@ -1020,7 +1041,7 @@
      */
     .extern artResolveStringFromCode
 ENTRY art_quick_resolve_string
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
     # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*)
     jal     artResolveStringFromCode
     move    $a2, rSELF                # pass Thread::Current
@@ -1032,7 +1053,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
     jal     \entrypoint
     move    $a2, rSELF                # pass Thread::Current
     \return
@@ -1042,7 +1063,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
     jal     \entrypoint
     move    $a3, rSELF                # pass Thread::Current
     \return
@@ -1075,13 +1096,12 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    sw      $a0, 0($sp)            # place proxy method at bottom of frame
-    move    $a2, rSELF             # pass Thread::Current
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    move    $a2, rSELF                  # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
-    move    $a3, $sp               # pass $sp
+    addiu   $a3, $sp, ARG_SLOT_SIZE     # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     bnez    $t0, 1f
     mtc1    $v0, $f0               # place return value to FP return value
     jr      $ra
@@ -1107,11 +1127,11 @@
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    move    $a2, rSELF             # pass Thread::Current
+    move    $a2, rSELF                    # pass Thread::Current
     jal     artQuickResolutionTrampoline  # (Method* called, receiver, Thread*, SP)
-    move    $a3, $sp               # pass $sp
+    addiu   $a3, $sp, ARG_SLOT_SIZE       # pass $sp (remove arg slots)
     beqz    $v0, 1f
-    lw      $a0, 0($sp)            # load resolved method to $a0
+    lw      $a0, ARG_SLOT_SIZE($sp)       # load resolved method to $a0
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jr      $v0                    # tail call to method
@@ -1121,16 +1141,75 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
-UNIMPLEMENTED art_quick_generic_jni_trampoline
+    .extern artQuickGenericJniTrampoline
+    .extern artQuickGenericJniEndTrampoline
+ENTRY art_quick_generic_jni_trampoline
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    move    $s8, $sp               # save $sp to $s8
+    move    $s3, $gp               # save $gp to $s3
+
+    # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
+    move    $a0, rSELF                     # pass Thread::Current
+    addiu   $a1, $sp, ARG_SLOT_SIZE        # save $sp (remove arg slots)
+    jal     artQuickGenericJniTrampoline   # (Thread*, SP)
+    addiu   $sp, $sp, -5120                # reserve space on the stack
+
+    # The C call will have registered the complete save-frame on success.
+    # The result of the call is:
+    # v0: ptr to native code, 0 on error.
+    # v1: ptr to the bottom of the used area of the alloca, can restore stack till here.
+    beq     $v0, $zero, 1f         # check entry error
+    move    $t9, $v0               # save the code ptr
+    move    $sp, $v1               # release part of the alloca
+
+    # Load parameters from stack into registers
+    lw      $a0,   0($sp)
+    lw      $a1,   4($sp)
+    lw      $a2,   8($sp)
+
+    # Load FPRs the same as GPRs. Look at BuildNativeCallFrameStateMachine.
+    jalr    $t9                    # native call
+    lw      $a3,  12($sp)
+    addiu   $sp, $sp, 16           # remove arg slots
+
+    move    $gp, $s3               # restore $gp from $s3
+
+    # result sign extension is handled in C code
+    # prepare for call to artQuickGenericJniEndTrampoline(Thread*, result, result_f)
+    move    $a0, rSELF             # pass Thread::Current
+    move    $a2, $v0               # pass result
+    move    $a3, $v1
+    addiu   $sp, $sp, -24          # reserve arg slots
+    jal     artQuickGenericJniEndTrampoline
+    s.d     $f0, 16($sp)           # pass result_f
+    addiu   $sp, $sp, 24           # remove arg slots
+
+    lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
+    bne     $t0, $zero, 2f         # check for pending exceptions
+    move    $sp, $s8               # tear down the alloca
+
+    # tear dpown the callee-save frame
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+
+    mtc1    $v0, $f0               # place return value to FP return value
+    jr      $ra
+    mtc1    $v1, $f1               # place return value to FP return value
+
+1:
+    move    $sp, $s8               # tear down the alloca
+2:
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
+END art_quick_generic_jni_trampoline
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    move    $a1, rSELF             # pass Thread::Current
-    jal     artQuickToInterpreterBridge    # (Method* method, Thread*, SP)
-    move    $a2, $sp               # pass $sp
+    move    $a1, rSELF                          # pass Thread::Current
+    jal     artQuickToInterpreterBridge         # (Method* method, Thread*, SP)
+    addiu   $a2, $sp, ARG_SLOT_SIZE             # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     bnez    $t0, 1f
     mtc1    $v0, $f0               # place return value to FP return value
     jr      $ra
@@ -1146,17 +1225,12 @@
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    move     $t0, $sp       # remember bottom of caller's frame
-    addiu    $sp, $sp, -32  # space for saved a0, pad (2 words), arguments (4 words)
-    .cfi_adjust_cfa_offset 32
-    sw       $a0, 28($sp)   # save arg0
+    sw       $a0, 28($sp)   # save arg0 in free arg slot
     move     $a3, $ra       # pass $ra
     jal      artInstrumentationMethodEntryFromCode  # (Method*, Object*, Thread*, LR)
     move     $a2, rSELF     # pass Thread::Current
     move     $t9, $v0       # $t9 holds reference to code
-    lw       $a0, 28($sp)   # restore arg0
-    addiu    $sp, $sp, 32   # remove args
-    .cfi_adjust_cfa_offset -32
+    lw       $a0, 28($sp)   # restore arg0 from free arg slot
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     jalr     $t9            # call method
     nop
@@ -1168,32 +1242,33 @@
     addiu    $t9, $ra, 4    # put current address into $t9 to rebuild $gp
     .cpload  $t9
     move     $ra, $zero     # link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
-    move     $t0, $sp       # remember bottom of caller's frame
-    addiu    $sp, $sp, -48  # save return values and set up args
-    .cfi_adjust_cfa_offset 48
-    sw       $v0, 32($sp)
+
+    addiu    $sp, $sp, -16  # allocate temp storage on the stack
+    .cfi_adjust_cfa_offset 16
+    sw       $v0, 12($sp)
     .cfi_rel_offset 2, 32
-    sw       $v1, 36($sp)
+    sw       $v1, 8($sp)
     .cfi_rel_offset 3, 36 
-    s.s      $f0, 40($sp)
-    s.s      $f1, 44($sp)
+    s.s      $f0, 4($sp)
+    s.s      $f1, 0($sp)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     s.s      $f0, 16($sp)   # pass fpr result
     s.s      $f1, 20($sp)
     move     $a2, $v0       # pass gpr result
     move     $a3, $v1
-    move     $a1, $t0       # pass $sp
+    addiu    $a1, $sp, ARG_SLOT_SIZE   # pass $sp (remove arg slots)
     jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
     move     $a0, rSELF     # pass Thread::Current
     move     $t0, $v0       # set aside returned link register
     move     $ra, $v1       # set link register for deoptimization
-    lw       $v0, 32($sp)   # restore return values
-    lw       $v1, 36($sp)
-    l.s      $f0, 40($sp)
-    l.s      $f1, 44($sp)
+    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE  # args slot + refs_only callee save frame
+    lw       $v0, 12($sp)   # restore return values
+    lw       $v1, 8($sp)
+    l.s      $f0, 4($sp)
+    l.s      $f1, 0($sp)
     jr       $t0            # return
-    addiu    $sp, $sp, 112  # 48 bytes of args + 64 bytes of callee save frame
-    .cfi_adjust_cfa_offset -112
+    addiu    $sp, $sp, 16   # remove temp storage from stack
+    .cfi_adjust_cfa_offset -16
 END art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 2a8bcf0..5fbffbc 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -40,8 +40,7 @@
 
 constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(MipsCalleeSaveCoreSpills(type)) /* gprs */ +
-                  (type == Runtime::kRefsAndArgs ? 0 : 3) + 1 /* Method* */) *
-                 kMipsPointerSize, kStackAlignment);
+                  1 /* Method* */) * kMipsPointerSize, kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index c310191..cb69817 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -97,7 +97,9 @@
         }
       }
     }
-    CHECK(!bad_mutexes_held);
+    if (gAborting == 0) {  // Avoid recursive aborts.
+      CHECK(!bad_mutexes_held);
+    }
   }
   // Don't record monitors as they are outside the scope of analysis. They may be inspected off of
   // the monitor list.
@@ -112,7 +114,7 @@
     return;
   }
   if (level_ != kMonitorLock) {
-    if (kDebugLocking && !gAborting) {
+    if (kDebugLocking && gAborting == 0) {  // Avoid recursive aborts.
       CHECK(self->GetHeldMutex(level_) == this) << "Unlocking on unacquired mutex: " << name_;
     }
     self->SetHeldMutex(level_, NULL);
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 9d2d59c..821d613 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -53,7 +53,7 @@
 
   void CheckReferences(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (GetMethod()->IsOptimized()) {
+    if (GetMethod()->IsOptimized(sizeof(void*))) {
       CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
     } else {
       CheckQuickMethod(registers, number_of_references, native_pc_offset);
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 37c5f9c..6597235 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -794,7 +794,7 @@
 Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
     ::GetHashChain(size_t i, bool* ok) const {
-  if (i >= GetHashBucketNum()) {
+  if (i >= GetHashChainNum()) {
     *ok = false;
     return 0;
   }
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index d5493bd..54dbd8c 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -35,7 +35,7 @@
   if (instrumentation->IsDeoptimized(method)) {
     result = GetQuickToInterpreterBridge();
   } else {
-    result = instrumentation->GetQuickCodeFor(method);
+    result = instrumentation->GetQuickCodeFor(method, sizeof(void*));
     DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
   }
   bool interpreter_entry = (result == GetQuickToInterpreterBridge());
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 0b7d382..93dc62a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -919,17 +919,16 @@
   static constexpr bool kAlignLongOnStack = false;
   static constexpr bool kAlignDoubleOnStack = false;
 #elif defined(__mips__)
-  // TODO: These are all dummy values!
   static constexpr bool kNativeSoftFloatAbi = true;  // This is a hard float ABI.
-  static constexpr size_t kNumNativeGprArgs = 0;  // 6 arguments passed in GPRs.
-  static constexpr size_t kNumNativeFprArgs = 0;  // 8 arguments passed in FPRs.
+  static constexpr size_t kNumNativeGprArgs = 4;  // 4 arguments passed in GPRs.
+  static constexpr size_t kNumNativeFprArgs = 0;  // 0 arguments passed in FPRs.
 
   static constexpr size_t kRegistersNeededForLong = 2;
   static constexpr size_t kRegistersNeededForDouble = 2;
   static constexpr bool kMultiRegistersAligned = true;
   static constexpr bool kMultiRegistersWidened = true;
-  static constexpr bool kAlignLongOnStack = false;
-  static constexpr bool kAlignDoubleOnStack = false;
+  static constexpr bool kAlignLongOnStack = true;
+  static constexpr bool kAlignDoubleOnStack = true;
 #elif defined(__i386__)
   // TODO: Check these!
   static constexpr bool kNativeSoftFloatAbi = false;  // Not using int registers for fp
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index ab3ec62..835485c 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -360,7 +360,8 @@
   // at the return PC address.
   if (true || kIsDebugBuild) {
     VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
-    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj);
+    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj,
+                                                                                 sizeof(void*));
     uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
     VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h
index 793d798..ae8e892 100644
--- a/runtime/gc/space/valgrind_malloc_space-inl.h
+++ b/runtime/gc/space/valgrind_malloc_space-inl.h
@@ -126,6 +126,30 @@
           size_t kValgrindRedZoneBytes,
           bool kAdjustForRedzoneInAllocSize,
           bool kUseObjSizeForUsable>
+mirror::Object* ValgrindMallocSpace<S,
+                                    kValgrindRedZoneBytes,
+                                    kAdjustForRedzoneInAllocSize,
+                                    kUseObjSizeForUsable>::AllocThreadUnsafe(
+    Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) {
+  size_t bytes_allocated;
+  size_t usable_size;
+  void* obj_with_rdz = S::AllocThreadUnsafe(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                &bytes_allocated, &usable_size);
+  if (obj_with_rdz == nullptr) {
+    return nullptr;
+  }
+
+  return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes,
+                                             kUseObjSizeForUsable>(obj_with_rdz, num_bytes,
+                                                                   bytes_allocated, usable_size,
+                                                                   bytes_allocated_out,
+                                                                   usable_size_out);
+}
+
+template <typename S,
+          size_t kValgrindRedZoneBytes,
+          bool kAdjustForRedzoneInAllocSize,
+          bool kUseObjSizeForUsable>
 size_t ValgrindMallocSpace<S,
                            kValgrindRedZoneBytes,
                            kAdjustForRedzoneInAllocSize,
diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h
index d102f49..707ea69 100644
--- a/runtime/gc/space/valgrind_malloc_space.h
+++ b/runtime/gc/space/valgrind_malloc_space.h
@@ -37,6 +37,9 @@
                                   size_t* usable_size) OVERRIDE;
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                         size_t* usable_size) OVERRIDE;
+  mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                    size_t* usable_size) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE;
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 003e160..639b0f0 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -869,10 +869,10 @@
   ConfigureStubs(false, false);
 }
 
-const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method) const {
+const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method, size_t pointer_size) const {
   Runtime* runtime = Runtime::Current();
   if (LIKELY(!instrumentation_stubs_installed_)) {
-    const void* code = method->GetEntryPointFromQuickCompiledCode();
+    const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
     DCHECK(code != nullptr);
     ClassLinker* class_linker = runtime->GetClassLinker();
     if (LIKELY(!class_linker->IsQuickResolutionStub(code) &&
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 369039d..effa9f7 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -200,7 +200,7 @@
   // Get the quick code for the given method. More efficient than asking the class linker as it
   // will short-cut to GetCode if instrumentation and static method resolution stubs aren't
   // installed.
-  const void* GetQuickCodeFor(mirror::ArtMethod* method) const
+  const void* GetQuickCodeFor(mirror::ArtMethod* method, size_t pointer_size) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ForceInterpretOnly() {
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 62d17ab..b936511 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -199,17 +199,17 @@
   SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(code_offset));
 }
 
-inline const uint8_t* ArtMethod::GetMappingTable() {
-  const void* code_pointer = GetQuickOatCodePointer();
+inline const uint8_t* ArtMethod::GetMappingTable(size_t pointer_size) {
+  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
   if (code_pointer == nullptr) {
     return nullptr;
   }
-  return GetMappingTable(code_pointer);
+  return GetMappingTable(code_pointer, pointer_size);
 }
 
-inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer) {
+inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer, size_t pointer_size) {
   DCHECK(code_pointer != nullptr);
-  DCHECK(code_pointer == GetQuickOatCodePointer());
+  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
   uint32_t offset =
       reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
   if (UNLIKELY(offset == 0u)) {
@@ -218,18 +218,18 @@
   return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
 }
 
-inline const uint8_t* ArtMethod::GetVmapTable() {
-  const void* code_pointer = GetQuickOatCodePointer();
+inline const uint8_t* ArtMethod::GetVmapTable(size_t pointer_size) {
+  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
   if (code_pointer == nullptr) {
     return nullptr;
   }
-  return GetVmapTable(code_pointer);
+  return GetVmapTable(code_pointer, pointer_size);
 }
 
-inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer) {
-  CHECK(!IsOptimized()) << "Unimplemented vmap table for optimized compiler";
+inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer, size_t pointer_size) {
+  CHECK(!IsOptimized(pointer_size)) << "Unimplemented vmap table for optimized compiler";
   DCHECK(code_pointer != nullptr);
-  DCHECK(code_pointer == GetQuickOatCodePointer());
+  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
   uint32_t offset =
       reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
   if (UNLIKELY(offset == 0u)) {
@@ -243,8 +243,8 @@
 }
 
 inline CodeInfo ArtMethod::GetOptimizedCodeInfo() {
-  DCHECK(IsOptimized());
-  const void* code_pointer = GetQuickOatCodePointer();
+  DCHECK(IsOptimized(sizeof(void*)));
+  const void* code_pointer = GetQuickOatCodePointer(sizeof(void*));
   DCHECK(code_pointer != nullptr);
   uint32_t offset =
       reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
@@ -303,13 +303,14 @@
 }
 
 inline uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc) {
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
+  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(
+      this, sizeof(void*));
   return pc - reinterpret_cast<uintptr_t>(code);
 }
 
 inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
   DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer());
+  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(sizeof(void*)));
   return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
 }
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 3b4d5f3..4f5ca3f 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -165,9 +165,9 @@
     // Portable doesn't use the machine pc, we just use dex pc instead.
     return static_cast<uint32_t>(pc);
   }
-  const void* entry_point = GetQuickOatEntryPoint();
-  MappingTable table(
-      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
+  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  MappingTable table(entry_point != nullptr ?
+      GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
   if (table.TotalSize() == 0) {
     // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
     // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
@@ -198,9 +198,9 @@
 }
 
 uintptr_t ArtMethod::ToNativeQuickPc(const uint32_t dex_pc, bool abort_on_failure) {
-  const void* entry_point = GetQuickOatEntryPoint();
-  MappingTable table(
-      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
+  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  MappingTable table(entry_point != nullptr ?
+      GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
   if (table.TotalSize() == 0) {
     DCHECK_EQ(dex_pc, 0U);
     return 0;   // Special no mapping/pc == 0 case
@@ -320,13 +320,13 @@
   }
 }
 
-const void* ArtMethod::GetQuickOatEntryPoint() {
+const void* ArtMethod::GetQuickOatEntryPoint(size_t pointer_size) {
   if (IsPortableCompiled() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
     return nullptr;
   }
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  const void* code = runtime->GetInstrumentation()->GetQuickCodeFor(this);
+  const void* code = runtime->GetInstrumentation()->GetQuickCodeFor(this, pointer_size);
   // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
   // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
   // for non-native methods.
@@ -340,7 +340,7 @@
 #ifndef NDEBUG
 uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point) {
   CHECK_NE(quick_entry_point, GetQuickToInterpreterBridge());
-  CHECK_EQ(quick_entry_point, Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this));
+  CHECK_EQ(quick_entry_point, Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*)));
   return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
 }
 #endif
@@ -436,18 +436,32 @@
     return QuickMethodFrameInfo(kStackAlignment, 0u, 0u);
   }
   Runtime* runtime = Runtime::Current();
-  // For Proxy method we exclude direct method (there is only one direct method - constructor).
-  // Direct method is cloned from original java.lang.reflect.Proxy class together with code
-  // and as a result it is executed as usual quick compiled method without any stubs.
-  // So the frame info should be returned as it is a quick method not a stub.
-  if (UNLIKELY(IsAbstract()) || UNLIKELY(IsProxyMethod() && !IsDirect())) {
+
+  if (UNLIKELY(IsAbstract())) {
     return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
   }
+
+  // For Proxy method we add special handling for the direct method case  (there is only one
+  // direct method - constructor). Direct method is cloned from original
+  // java.lang.reflect.Proxy class together with code and as a result it is executed as usual
+  // quick compiled method without any stubs. So the frame info should be returned as it is a
+  // quick method not a stub. However, if instrumentation stubs are installed, the
+  // instrumentation->GetQuickCodeFor() returns the artQuickProxyInvokeHandler instead of an
+  // oat code pointer, thus we have to add a special case here.
+  if (UNLIKELY(IsProxyMethod())) {
+    if (IsDirect()) {
+      CHECK(IsConstructor());
+      return GetQuickFrameInfo(EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode()));
+    } else {
+      return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    }
+  }
+
   if (UNLIKELY(IsRuntimeMethod())) {
     return runtime->GetRuntimeMethodFrameInfo(this);
   }
 
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*));
   ClassLinker* class_linker = runtime->GetClassLinker();
   // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
   // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 4a7831f..d292552 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -146,13 +146,13 @@
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
 
-  bool IsOptimized() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool IsOptimized(size_t pointer_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Temporary solution for detecting if a method has been optimized: the compiler
     // does not create a GC map. Instead, the vmap table contains the stack map
     // (as in stack_map.h).
-    return (GetEntryPointFromQuickCompiledCode() != nullptr)
-        && (GetQuickOatCodePointer() != nullptr)
-        && (GetNativeGcMap() == nullptr);
+    return GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
+        && GetQuickOatCodePointer(pointer_size) != nullptr
+        && GetNativeGcMapPtrSize(pointer_size) == nullptr;
   }
 
   bool IsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -381,22 +381,25 @@
     return reinterpret_cast<const void*>(code);
   }
 
-  // Actual entry point pointer to compiled oat code or nullptr if method has no compiled code.
-  const void* GetQuickOatEntryPoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
+  // Actual entry point pointer to compiled oat code or nullptr.
+  const void* GetQuickOatEntryPoint(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Actual pointer to compiled oat code or nullptr.
-  const void* GetQuickOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return EntryPointToCodePointer(GetQuickOatEntryPoint());
+  const void* GetQuickOatCodePointer(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return EntryPointToCodePointer(GetQuickOatEntryPoint(pointer_size));
   }
 
   // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
-  const uint8_t* GetMappingTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const uint8_t* GetMappingTable(const void* code_pointer)
+  const uint8_t* GetMappingTable(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetMappingTable(const void* code_pointer, size_t pointer_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
-  const uint8_t* GetVmapTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const uint8_t* GetVmapTable(const void* code_pointer)
+  const uint8_t* GetVmapTable(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetVmapTable(const void* code_pointer, size_t pointer_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   StackMap GetStackMap(uint32_t native_pc_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index a8d4308..72b696b 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -106,9 +106,7 @@
 };
 
 struct InlineIGetIPutData {
-  // The op_variant below is opcode-Instruction::IGET for IGETs and
-  // opcode-Instruction::IPUT for IPUTs. This is because the runtime
-  // doesn't know the OpSize enumeration.
+  // The op_variant below is DexMemAccessType but the runtime doesn't know that enumeration.
   uint16_t op_variant : 3;
   uint16_t method_is_static : 1;
   uint16_t object_arg : 4;
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 4408609..43714b9 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -125,7 +125,7 @@
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
     }
-  } else if (m->IsOptimized()) {
+  } else if (m->IsOptimized(sizeof(void*))) {
     // TODO: Implement, currently only used for exceptions when jdwp is enabled.
     UNIMPLEMENTED(WARNING)
         << "StackVisitor::GetThisObject is unimplemented with the optimizing compiler";
@@ -153,9 +153,9 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    const void* code_pointer = m->GetQuickOatCodePointer();
+    const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
     DCHECK(code_pointer != nullptr);
-    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
     QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
     // TODO: IsInContext stops before spotting floating point registers.
@@ -207,9 +207,9 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    const void* code_pointer = m->GetQuickOatCodePointer();
+    const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
     DCHECK(code_pointer != nullptr);
-    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
     QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset_lo, vmap_offset_hi;
     // TODO: IsInContext stops before spotting floating point registers.
@@ -254,9 +254,9 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    const void* code_pointer = m->GetQuickOatCodePointer();
+    const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
     DCHECK(code_pointer != nullptr);
-    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
     QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
     // TODO: IsInContext stops before spotting floating point registers.
@@ -318,9 +318,9 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    const void* code_pointer = m->GetQuickOatCodePointer();
+    const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
     DCHECK(code_pointer != nullptr);
-    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
     QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset_lo, vmap_offset_hi;
     // TODO: IsInContext stops before spotting floating point registers.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c769faf..163c11d 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -96,8 +96,8 @@
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
   uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.interpreter_entrypoints);
-  uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(begin) +
-                                                sizeof(tlsPtr_.quick_entrypoints));
+  uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
+      sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
@@ -2136,9 +2136,9 @@
 
     // Process register map (which native and runtime methods don't have)
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
-      if (m->IsOptimized()) {
+      if (m->IsOptimized(sizeof(void*))) {
         Runtime* runtime = Runtime::Current();
-        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m);
+        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
         uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
         StackMap map = m->GetStackMap(native_pc_offset);
         MemoryRegion mask = map.GetStackMask();
@@ -2167,12 +2167,12 @@
                                    static_cast<size_t>(code_item->registers_size_));
         if (num_regs > 0) {
           Runtime* runtime = Runtime::Current();
-          const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m);
+          const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
           uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
           const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
           DCHECK(reg_bitmap != nullptr);
           const void* code_pointer = mirror::ArtMethod::EntryPointToCodePointer(entry_point);
-          const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+          const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
           QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
           // For all dex registers in the bitmap
           DCHECK(cur_quick_frame != nullptr);
diff --git a/runtime/verifier/reg_type-inl.h b/runtime/verifier/reg_type-inl.h
index 480ed40..f445132 100644
--- a/runtime/verifier/reg_type-inl.h
+++ b/runtime/verifier/reg_type-inl.h
@@ -81,6 +81,9 @@
       return rhs.IsLongTypes();
     } else if (lhs.IsDoubleLo()) {
       return rhs.IsDoubleTypes();
+    } else if (lhs.IsConflict()) {
+      LOG(WARNING) << "RegType::AssignableFrom lhs is Conflict!";
+      return false;
     } else {
       CHECK(lhs.IsReferenceTypes())
           << "Unexpected register type in IsAssignableFrom: '"
diff --git a/test/431-optimizing-arith-shifts/expected.txt b/test/431-optimizing-arith-shifts/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/expected.txt
diff --git a/test/431-optimizing-arith-shifts/info.txt b/test/431-optimizing-arith-shifts/info.txt
new file mode 100644
index 0000000..14ff264
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/info.txt
@@ -0,0 +1 @@
+Tests for shift operations.
diff --git a/test/431-optimizing-arith-shifts/src/Main.java b/test/431-optimizing-arith-shifts/src/Main.java
new file mode 100644
index 0000000..d8667c6
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/src/Main.java
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    shlInt();
+    shlLong();
+    shrInt();
+    shrLong();
+    ushrInt();
+    ushrLong();
+  }
+
+  private static void shlInt() {
+    expectEquals(48, $opt$ShlConst2(12));
+    expectEquals(12, $opt$ShlConst0(12));
+    expectEquals(-48, $opt$Shl(-12, 2));
+    expectEquals(1024, $opt$Shl(32, 5));
+
+    expectEquals(7, $opt$Shl(7, 0));
+    expectEquals(14, $opt$Shl(7, 1));
+    expectEquals(0, $opt$Shl(0, 30));
+
+    expectEquals(1073741824L, $opt$Shl(1, 30));
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, 31));  // overflow
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1073741824, 1));  // overflow
+    expectEquals(1073741824, $opt$Shl(268435456, 2));
+
+   // othe nly 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$Shl(7, 32));  // 32 & 0x1f = 0
+    expectEquals(14, $opt$Shl(7, 33));  // 33 & 0x1f = 1
+    expectEquals(32, $opt$Shl(1, 101));  // 101 & 0x1f = 5
+
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, -1));  // -1 & 0x1f = 31
+    expectEquals(14, $opt$Shl(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$Shl(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-536870912, $opt$Shl(7, -3));  // -3 & 0x1f = 29
+
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$Shl(7, Integer.MIN_VALUE));
+  }
+
+  private static void shlLong() {
+    expectEquals(48L, $opt$ShlConst2(12L));
+    expectEquals(12L, $opt$ShlConst0(12L));
+    expectEquals(-48L, $opt$Shl(-12L, 2L));
+    expectEquals(1024L, $opt$Shl(32L, 5L));
+
+    expectEquals(7L, $opt$Shl(7L, 0L));
+    expectEquals(14L, $opt$Shl(7L, 1L));
+    expectEquals(0L, $opt$Shl(0L, 30L));
+
+    expectEquals(1073741824L, $opt$Shl(1L, 30L));
+    expectEquals(2147483648L, $opt$Shl(1L, 31L));
+    expectEquals(2147483648L, $opt$Shl(1073741824L, 1L));
+
+    // Long shifts can use up to 6 lower bits.
+    expectEquals(4294967296L, $opt$Shl(1L, 32L));
+    expectEquals(60129542144L, $opt$Shl(7L, 33L));
+    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, 63L));  // overflow
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$Shl(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(14L, $opt$Shl(7L, 65L));  // 65 & 0x3f = 1
+    expectEquals(137438953472L, $opt$Shl(1L, 101L));  // 101 & 0x3f = 37
+
+    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, -1L));  // -1 & 0x3f = 63
+    expectEquals(14L, $opt$Shl(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$Shl(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(2305843009213693952L, $opt$Shl(1L, -3L));  // -3 & 0x3f = 61
+
+    expectEquals(Long.MIN_VALUE, $opt$Shl(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$Shl(7L, Long.MIN_VALUE));
+  }
+
+  private static void shrInt() {
+    expectEquals(3, $opt$ShrConst2(12));
+    expectEquals(12, $opt$ShrConst0(12));
+    expectEquals(-3, $opt$Shr(-12, 2));
+    expectEquals(1, $opt$Shr(32, 5));
+
+    expectEquals(7, $opt$Shr(7, 0));
+    expectEquals(3, $opt$Shr(7, 1));
+    expectEquals(0, $opt$Shr(0, 30));
+    expectEquals(0, $opt$Shr(1, 30));
+    expectEquals(-1, $opt$Shr(-1, 30));
+
+    expectEquals(0, $opt$Shr(Integer.MAX_VALUE, 31));
+    expectEquals(-1, $opt$Shr(Integer.MIN_VALUE, 31));
+
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$Shr(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$Shr(7, 33));  // 33 & 0x1f = 1
+
+    expectEquals(0, $opt$Shr(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$Shr(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$Shr(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-4, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0, $opt$Shr(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$Shr(7, Integer.MIN_VALUE));
+  }
+
+  private static void shrLong() {
+    expectEquals(3L, $opt$ShrConst2(12L));
+    expectEquals(12L, $opt$ShrConst0(12L));
+    expectEquals(-3L, $opt$Shr(-12L, 2L));
+    expectEquals(1, $opt$Shr(32, 5));
+
+    expectEquals(7L, $opt$Shr(7L, 0L));
+    expectEquals(3L, $opt$Shr(7L, 1L));
+    expectEquals(0L, $opt$Shr(0L, 30L));
+    expectEquals(0L, $opt$Shr(1L, 30L));
+    expectEquals(-1L, $opt$Shr(-1L, 30L));
+
+
+    expectEquals(1L, $opt$Shr(1073741824L, 30L));
+    expectEquals(1L, $opt$Shr(2147483648L, 31L));
+    expectEquals(1073741824L, $opt$Shr(2147483648L, 1L));
+
+    // Long shifts can use up to 6 lower bits.
+    expectEquals(1L, $opt$Shr(4294967296L, 32L));
+    expectEquals(7L, $opt$Shr(60129542144L, 33L));
+    expectEquals(0L, $opt$Shr(Long.MAX_VALUE, 63L));
+    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, 63L));
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$Shr(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$Shr(7L, 65L));  // 65 & 0x3f = 1
+
+    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$Shr(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$Shr(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$Shr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
+    expectEquals(-4L, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0L, $opt$Shr(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$Shr(7L, Long.MIN_VALUE));
+  }
+
+  private static void ushrInt() {
+    expectEquals(3, $opt$UShrConst2(12));
+    expectEquals(12, $opt$UShrConst0(12));
+    expectEquals(1073741821, $opt$UShr(-12, 2));
+    expectEquals(1, $opt$UShr(32, 5));
+
+    expectEquals(7, $opt$UShr(7, 0));
+    expectEquals(3, $opt$UShr(7, 1));
+    expectEquals(0, $opt$UShr(0, 30));
+    expectEquals(0, $opt$UShr(1, 30));
+    expectEquals(3, $opt$UShr(-1, 30));
+
+    expectEquals(0, $opt$UShr(Integer.MAX_VALUE, 31));
+    expectEquals(1, $opt$UShr(Integer.MIN_VALUE, 31));
+
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$UShr(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$UShr(7, 33));  // 33 & 0x1f = 1
+
+    expectEquals(0, $opt$UShr(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$UShr(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$UShr(7, -32));  // -32 & 0x1f = 0
+    expectEquals(4, $opt$UShr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0, $opt$UShr(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$UShr(7, Integer.MIN_VALUE));
+  }
+
+  private static void ushrLong() {
+    expectEquals(3L, $opt$UShrConst2(12L));
+    expectEquals(12L, $opt$UShrConst0(12L));
+    expectEquals(4611686018427387901L, $opt$UShr(-12L, 2L));
+    expectEquals(1, $opt$UShr(32, 5));
+
+    expectEquals(7L, $opt$UShr(7L, 0L));
+    expectEquals(3L, $opt$UShr(7L, 1L));
+    expectEquals(0L, $opt$UShr(0L, 30L));
+    expectEquals(0L, $opt$UShr(1L, 30L));
+    expectEquals(17179869183L, $opt$UShr(-1L, 30L));
+
+
+    expectEquals(1L, $opt$UShr(1073741824L, 30L));
+    expectEquals(1L, $opt$UShr(2147483648L, 31L));
+    expectEquals(1073741824L, $opt$UShr(2147483648L, 1L));
+
+    // Long shifts can use use up to 6 lower bits.
+    expectEquals(1L, $opt$UShr(4294967296L, 32L));
+    expectEquals(7L, $opt$UShr(60129542144L, 33L));
+    expectEquals(0L, $opt$UShr(Long.MAX_VALUE, 63L));
+    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, 63L));
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$UShr(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$UShr(7L, 65L));  // 65 & 0x3f = 1
+
+    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$UShr(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$UShr(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$UShr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
+    expectEquals(4L, $opt$UShr(Long.MIN_VALUE, -3L));  // -3 & 0x3f = 61
+
+    expectEquals(0L, $opt$UShr(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$UShr(7L, Long.MIN_VALUE));
+  }
+
+  static int $opt$Shl(int a, int b) {
+    return a << b;
+  }
+
+  static long $opt$Shl(long a, long b) {
+    return a << b;
+  }
+
+  static int $opt$Shr(int a, int b) {
+    return a >> b;
+  }
+
+  static long $opt$Shr(long a, long b) {
+    return a >> b;
+  }
+
+  static int $opt$UShr(int a, int b) {
+    return a >>> b;
+  }
+
+  static long $opt$UShr(long a, long b) {
+    return a >>> b;
+  }
+
+  static int $opt$ShlConst2(int a) {
+    return a << 2;
+  }
+
+  static long $opt$ShlConst2(long a) {
+    return a << 2L;
+  }
+
+  static int $opt$ShrConst2(int a) {
+    return a >> 2;
+  }
+
+  static long $opt$ShrConst2(long a) {
+    return a >> 2L;
+  }
+
+  static int $opt$UShrConst2(int a) {
+    return a >>> 2;
+  }
+
+  static long $opt$UShrConst2(long a) {
+    return a >>> 2L;
+  }
+
+    static int $opt$ShlConst0(int a) {
+    return a << 0;
+  }
+
+  static long $opt$ShlConst0(long a) {
+    return a << 0L;
+  }
+
+  static int $opt$ShrConst0(int a) {
+    return a >> 0;
+  }
+
+  static long $opt$ShrConst0(long a) {
+    return a >> 0L;
+  }
+
+  static int $opt$UShrConst0(int a) {
+    return a >>> 0;
+  }
+
+  static long $opt$UShrConst0(long a) {
+    return a >>> 0L;
+  }
+
+}
+
diff --git a/test/801-VoidCheckCast/classes.dex b/test/801-VoidCheckCast/classes.dex
new file mode 100644
index 0000000..e6f0f02
--- /dev/null
+++ b/test/801-VoidCheckCast/classes.dex
Binary files differ
diff --git a/test/801-VoidCheckCast/expected.txt b/test/801-VoidCheckCast/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/801-VoidCheckCast/expected.txt
diff --git a/test/801-VoidCheckCast/info.txt b/test/801-VoidCheckCast/info.txt
new file mode 100644
index 0000000..422f740
--- /dev/null
+++ b/test/801-VoidCheckCast/info.txt
@@ -0,0 +1,4 @@
+A test that is only available as a DEX binary.
+
+This tests that an attempt to use check-cast with the void type doesn't
+cause the compiler to crash.
diff --git a/test/etc/default-build b/test/etc/default-build
index ab859ec..6731ad3 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -17,6 +17,11 @@
 # Stop if something fails.
 set -e
 
+if [ -e classes.dex ]; then
+  zip $TEST_NAME.jar classes.dex
+  exit 0
+fi
+
 mkdir classes
 ${JAVAC} -d classes `find src -name '*.java'`
 
diff --git a/test/run-test b/test/run-test
index 843714b..e9dd86a 100755
--- a/test/run-test
+++ b/test/run-test
@@ -586,7 +586,7 @@
         echo '#################### info'
         cat "${td_info}" | sed 's/^/# /g'
         echo '#################### diffs'
-        diff --strip-trailing-cr -u "$expected" "$output" | tail -n 500
+        diff --strip-trailing-cr -u "$expected" "$output" | tail -n 2000
         echo '####################'
         echo ' '
     fi