Compiler: Take advantage of constant propagation

The common frontend tracks constants via a constant propagation pass.
When converting from MIR to GBC (for Portable) or LIR (for Quick),
recognize constant arguments and select more efficient codegen forms.

Note: we still have to flush constants to their associated vregs to
support deoptimization.  There's quite a bit of possible code size
gain if we were to eliminate unnecessary stores or enhance the vmap
table to explicitly represent the ranges over which Dalvik vregs
are constant.

Also some minor code refactoring related to array operations.  There
are sufficient architectural differences to make it worthwhile to
have target-dependent aget/aput generators.  On Arm, this is mostly
beneficial to floating point array loads and stores.

This CL yields a ~0.8% decrease in code size over the framework,
and a nice pop to a few of the standard point benchmarks
(linpack: ~10%, cm: ~11%, scimark: ~13% - no significant change to
the others)

Change-Id: I2337e1aa0622b34a34c3775f8b7dbf5e6969da3e
diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h
index f085a19..ca39e5a 100644
--- a/src/compiler/codegen/arm/codegen_arm.h
+++ b/src/compiler/codegen/arm/codegen_arm.h
@@ -39,7 +39,6 @@
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
     virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
                                        int val_lo, int val_hi);
-    virtual void LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,6 +89,12 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                                RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale);
+    virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -191,6 +196,7 @@
     static int EncodeShift(int code, int amount);
     static int ModifiedImmediate(uint32_t value);
     static ArmConditionCode ArmConditionEncoding(ConditionCode code);
+    bool InexpensiveConstant(int reg, int value);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc
index 0a6abd2..e86f379 100644
--- a/src/compiler/codegen/arm/int_arm.cc
+++ b/src/compiler/codegen/arm/int_arm.cc
@@ -558,4 +558,204 @@
   return false;
 }
 
+/*
+ * Generate array load
+ */
+void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_dest, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+  if (rl_dest.wide) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = AllocTemp(cu);
+    /* Get len */
+    LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
+  }
+  if (rl_dest.wide || rl_dest.fp) {
+    // No special indexed operation, lea + load w/ displacement
+    int reg_ptr = AllocTemp(cu);
+    OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+                     EncodeShift(kArmLsl, scale));
+    FreeTemp(cu, rl_index.low_reg);
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      // TODO: change kCondCS to a more meaningful name, is the sense of
+      // carry-set/clear flipped?
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    if (rl_dest.wide) {
+      LoadBaseDispWide(cu, reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
+      FreeTemp(cu, reg_ptr);
+      StoreValueWide(cu, rl_dest, rl_result);
+    } else {
+      LoadBaseDisp(cu, reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG);
+      FreeTemp(cu, reg_ptr);
+      StoreValue(cu, rl_dest, rl_result);
+    }
+  } else {
+    // Offset base, then use indexed load
+    int reg_ptr = AllocTemp(cu);
+    OpRegRegImm(cu, kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
+    FreeTemp(cu, rl_array.low_reg);
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      // TODO: change kCondCS to a more meaningful name, is the sense of
+      // carry-set/clear flipped?
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    LoadBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size);
+    FreeTemp(cu, reg_ptr);
+    StoreValue(cu, rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+
+  if (size == kLong || size == kDouble) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+  int reg_ptr = INVALID_REG;
+  if (IsTemp(cu, rl_array.low_reg)) {
+    Clobber(cu, rl_array.low_reg);
+    reg_ptr = rl_array.low_reg;
+  } else {
+    reg_ptr = AllocTemp(cu);
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = AllocTemp(cu);
+    //NOTE: max live temps(4) here.
+    /* Get len */
+    LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
+  }
+  /* at this point, reg_ptr points to array, 2 live temps */
+  if (rl_src.wide || rl_src.fp) {
+    if (rl_src.wide) {
+      rl_src = LoadValueWide(cu, rl_src, reg_class);
+    } else {
+      rl_src = LoadValue(cu, rl_src, reg_class);
+    }
+    OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+                     EncodeShift(kArmLsl, scale));
+    if (needs_range_check) {
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    if (rl_src.wide) {
+      StoreBaseDispWide(cu, reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg);
+    } else {
+      StoreBaseDisp(cu, reg_ptr, data_offset, rl_src.low_reg, size);
+    }
+  } else {
+    /* reg_ptr -> array data */
+    OpRegRegImm(cu, kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
+    rl_src = LoadValue(cu, rl_src, reg_class);
+    if (needs_range_check) {
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg,
+                     scale, size);
+  }
+  FreeTemp(cu, reg_ptr);
+}
+
+/*
+ * Generate array store
+ *
+ */
+void ArmCodegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset = Array::DataOffset(sizeof(Object*)).Int32Value();
+
+  FlushAllRegs(cu);  // Use explicit registers
+  LockCallTemps(cu);
+
+  int r_value = TargetReg(kArg0);  // Register holding value
+  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
+  int r_array = TargetReg(kArg2);  // Register holding array
+  int r_index = TargetReg(kArg3);  // Register holding index into array
+
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Grab array
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Grab value
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Grab index
+
+  GenNullCheck(cu, rl_array.s_reg_low, r_array, opt_flags);  // NPE?
+
+  // Store of null?
+  LIR* null_value_check = OpCmpImmBranch(cu, kCondEq, r_value, 0, NULL);
+
+  // Get the array's class.
+  LoadWordDisp(cu, r_array, Object::ClassOffset().Int32Value(), r_array_class);
+  CallRuntimeHelperRegReg(cu, ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value,
+                          r_array_class, true);
+  // Redo LoadValues in case they didn't survive the call.
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Reload array
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Reload index
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Reload value
+  r_array_class = INVALID_REG;
+
+  // Branch here if value to be stored == null
+  LIR* target = NewLIR0(cu, kPseudoTargetLabel);
+  null_value_check->target = target;
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = TargetReg(kArg1);
+    LoadWordDisp(cu, r_array, len_offset, reg_len);  // Get len
+  }
+  /* r_ptr -> array data */
+  int r_ptr = AllocTemp(cu);
+  OpRegRegImm(cu, kOpAdd, r_ptr, r_array, data_offset);
+  if (needs_range_check) {
+    GenRegRegCheck(cu, kCondCs, r_index, reg_len, kThrowArrayBounds);
+  }
+  StoreBaseIndexed(cu, r_ptr, r_index, r_value, scale, kWord);
+  FreeTemp(cu, r_ptr);
+  FreeTemp(cu, r_index);
+  MarkGCCard(cu, r_value, r_array);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index 7f37bea..5c25eee 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -126,6 +126,21 @@
    return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
 }
 
+bool ArmCodegen::InexpensiveConstant(int reg, int value)
+{
+  bool res = false;
+  if (ARM_FPREG(reg)) {
+    res = (EncodeImmSingle(value) >= 0);
+  } else {
+    if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) {
+      res = true;
+    } else {
+      res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+    }
+  }
+  return res;
+}
+
 /*
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.
@@ -1011,11 +1026,6 @@
   return StoreBaseDispBody(cu, rBase, displacement, r_src_lo, r_src_hi, kLong);
 }
 
-void ArmCodegen::LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg)
-{
-  LoadBaseDispWide(cu, base, 0, low_reg, high_reg, INVALID_SREG);
-}
-
 LIR* ArmCodegen::OpFpRegCopy(CompilationUnit* cu, int r_dest, int r_src)
 {
   int opcode;
diff --git a/src/compiler/codegen/codegen.h b/src/compiler/codegen/codegen.h
index e512803..7a85ce8 100644
--- a/src/compiler/codegen/codegen.h
+++ b/src/compiler/codegen/codegen.h
@@ -135,12 +135,6 @@
     void GenInstanceof(CompilationUnit* cu, uint32_t type_idx, RegLocation rl_dest,
                        RegLocation rl_src);
     void GenCheckCast(CompilationUnit* cu, uint32_t type_idx, RegLocation rl_src);
-    void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
-                        RegLocation rl_index, RegLocation rl_src, int scale);
-    void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_dest, int scale);
-    void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale);
     void GenLong3Addr(CompilationUnit* cu, OpKind first_op, OpKind second_op, RegLocation rl_dest,
                       RegLocation rl_src1, RegLocation rl_src2);
     bool GenShiftOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
@@ -245,7 +239,6 @@
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value) = 0;
     virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
                                        int val_lo, int val_hi) = 0;
-    virtual void LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg) = 0;
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size) = 0;
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -350,6 +343,12 @@
                                  RegLocation rl_src) = 0;
     virtual void GenSpecialCase(CompilationUnit* cu, BasicBlock* bb, MIR* mir,
                                 SpecialCaseHandler special_case) = 0;
+    virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                                RegLocation rl_index, RegLocation rl_src, int scale) = 0;
+    virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
+    virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                     RegLocation rl_index, RegLocation rl_src, int scale) = 0;
 
     // Required for target - single operation generators.
     virtual LIR* OpUnconditionalBranch(CompilationUnit* cu, LIR* target) = 0;
@@ -382,6 +381,7 @@
     virtual void OpRegCopyWide(CompilationUnit* cu, int dest_lo, int dest_hi, int src_lo,
                                int src_hi) = 0;
     virtual void OpTlsCmp(CompilationUnit* cu, int offset, int val) = 0;
+    virtual bool InexpensiveConstant(int reg, int value) = 0;
 
     // Temp workaround
     void Workaround7250540(CompilationUnit* cu, RegLocation rl_dest, int value);
diff --git a/src/compiler/codegen/codegen_util.cc b/src/compiler/codegen/codegen_util.cc
index bab5cd9..77a2269 100644
--- a/src/compiler/codegen/codegen_util.cc
+++ b/src/compiler/codegen/codegen_util.cc
@@ -1074,4 +1074,27 @@
   return res;
 }
 
+bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2)
+{
+  bool is_taken;
+  switch (opcode) {
+    case Instruction::IF_EQ: is_taken = (src1 == src2); break;
+    case Instruction::IF_NE: is_taken = (src1 != src2); break;
+    case Instruction::IF_LT: is_taken = (src1 < src2); break;
+    case Instruction::IF_GE: is_taken = (src1 >= src2); break;
+    case Instruction::IF_GT: is_taken = (src1 > src2); break;
+    case Instruction::IF_LE: is_taken = (src1 <= src2); break;
+    case Instruction::IF_EQZ: is_taken = (src1 == 0); break;
+    case Instruction::IF_NEZ: is_taken = (src1 != 0); break;
+    case Instruction::IF_LTZ: is_taken = (src1 < 0); break;
+    case Instruction::IF_GEZ: is_taken = (src1 >= 0); break;
+    case Instruction::IF_GTZ: is_taken = (src1 > 0); break;
+    case Instruction::IF_LEZ: is_taken = (src1 <= 0); break;
+    default:
+      LOG(FATAL) << "Unexpected opcode " << opcode;
+      is_taken = false;
+  }
+  return is_taken;
+}
+
 } // namespace art
diff --git a/src/compiler/codegen/codegen_util.h b/src/compiler/codegen/codegen_util.h
index 6a9b6cd..3bb4291 100644
--- a/src/compiler/codegen/codegen_util.h
+++ b/src/compiler/codegen/codegen_util.h
@@ -51,6 +51,7 @@
 void DumpPackedSwitchTable(const uint16_t* table);
 LIR* MarkBoundary(CompilationUnit* cu, int offset, const char* inst_str);
 void NopLIR(LIR* lir);
+bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
 
 }  // namespace art
 
diff --git a/src/compiler/codegen/gen_common.cc b/src/compiler/codegen/gen_common.cc
index db99a30..275aee5 100644
--- a/src/compiler/codegen/gen_common.cc
+++ b/src/compiler/codegen/gen_common.cc
@@ -89,13 +89,28 @@
   return branch;
 }
 
+// Convert relation of src1/src2 to src2/src1
+ConditionCode FlipComparisonOrder(ConditionCode before) {
+  ConditionCode res;
+  switch (before) {
+    case kCondEq: res = kCondEq; break;
+    case kCondNe: res = kCondNe; break;
+    case kCondLt: res = kCondGt; break;
+    case kCondGt: res = kCondLt; break;
+    case kCondLe: res = kCondGe; break;
+    case kCondGe: res = kCondLe; break;
+    default:
+      res = static_cast<ConditionCode>(0);
+      LOG(FATAL) << "Unexpected ccode " << before;
+  }
+  return res;
+}
+
 void Codegen::GenCompareAndBranch(CompilationUnit* cu, Instruction::Code opcode,
                                   RegLocation rl_src1, RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through)
 {
   ConditionCode cond;
-  rl_src1 = LoadValue(cu, rl_src1, kCoreReg);
-  rl_src2 = LoadValue(cu, rl_src2, kCoreReg);
   switch (opcode) {
     case Instruction::IF_EQ:
       cond = kCondEq;
@@ -119,6 +134,29 @@
       cond = static_cast<ConditionCode>(0);
       LOG(FATAL) << "Unexpected opcode " << opcode;
   }
+
+  // Normalize such that if either operand is constant, src2 will be constant
+  if (rl_src1.is_const) {
+    RegLocation rl_temp = rl_src1;
+    rl_src1 = rl_src2;
+    rl_src2 = rl_temp;
+    cond = FlipComparisonOrder(cond);
+  }
+
+  rl_src1 = LoadValue(cu, rl_src1, kCoreReg);
+  // Is this really an immediate comparison?
+  if (rl_src2.is_const) {
+    int immval = cu->constant_values[rl_src2.orig_sreg];
+    // If it's already live in a register or not easily materialized, just keep going
+    RegLocation rl_temp = UpdateLoc(cu, rl_src2);
+    if ((rl_temp.location == kLocDalvikFrame) && InexpensiveConstant(rl_src1.low_reg, immval)) {
+      // OK - convert this to a compare immediate and branch
+      OpCmpImmBranch(cu, cond, rl_src1.low_reg, immval, taken);
+      OpUnconditionalBranch(cu, fall_through);
+      return;
+    }
+  }
+  rl_src2 = LoadValue(cu, rl_src2, kCoreReg);
   OpCmpBranch(cu, cond, rl_src1.low_reg, rl_src2.low_reg, taken);
   OpUnconditionalBranch(cu, fall_through);
 }
@@ -151,12 +189,7 @@
       cond = static_cast<ConditionCode>(0);
       LOG(FATAL) << "Unexpected opcode " << opcode;
   }
-  if (cu->instruction_set == kThumb2) {
-    OpRegImm(cu, kOpCmp, rl_src.low_reg, 0);
-    OpCondBranch(cu, cond, taken);
-  } else {
-    OpCmpImmBranch(cu, cond, rl_src.low_reg, 0, taken);
-  }
+  OpCmpImmBranch(cu, cond, rl_src.low_reg, 0, taken);
   OpUnconditionalBranch(cu, fall_through);
 }
 
@@ -668,7 +701,7 @@
         int reg_ptr = AllocTemp(cu);
         OpRegRegImm(cu, kOpAdd, reg_ptr, rl_obj.low_reg, field_offset);
         rl_result = EvalLoc(cu, rl_dest, reg_class, true);
-        LoadPair(cu, reg_ptr, rl_result.low_reg, rl_result.high_reg);
+        LoadBaseDispWide(cu, reg_ptr, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
         if (is_volatile) {
           GenMemBarrier(cu, kLoadLoad);
         }
@@ -1056,270 +1089,6 @@
   branch2->target = target;
 }
 
-/*
- * Generate array store
- *
- */
-void Codegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale)
-{
-  int len_offset = Array::LengthOffset().Int32Value();
-  int data_offset = Array::DataOffset(sizeof(Object*)).Int32Value();
-
-  FlushAllRegs(cu);  // Use explicit registers
-  LockCallTemps(cu);
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(cu, rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(cu, rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(cu, rl_index, r_index);  // Grab index
-
-  GenNullCheck(cu, rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(cu, kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(cu, r_array, Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(cu, ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(cu, rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(cu, rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(cu, rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(cu, kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  if (cu->instruction_set == kX86) {
-    // make an extra temp available for card mark below
-    FreeTemp(cu, TargetReg(kArg1));
-    if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-      /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
-      GenRegMemCheck(cu, kCondUge, r_index, r_array, len_offset, kThrowArrayBounds);
-    }
-    StoreBaseIndexedDisp(cu, r_array, r_index, scale,
-                         data_offset, r_value, INVALID_REG, kWord, INVALID_SREG);
-  } else {
-    bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-    int reg_len = INVALID_REG;
-    if (needs_range_check) {
-      reg_len = TargetReg(kArg1);
-      LoadWordDisp(cu, r_array, len_offset, reg_len);  // Get len
-    }
-    /* r_ptr -> array data */
-    int r_ptr = AllocTemp(cu);
-    OpRegRegImm(cu, kOpAdd, r_ptr, r_array, data_offset);
-    if (needs_range_check) {
-      GenRegRegCheck(cu, kCondCs, r_index, reg_len, kThrowArrayBounds);
-    }
-    StoreBaseIndexed(cu, r_ptr, r_index, r_value, scale, kWord);
-    FreeTemp(cu, r_ptr);
-  }
-  FreeTemp(cu, r_index);
-  MarkGCCard(cu, r_value, r_array);
-}
-
-/*
- * Generate array load
- */
-void Codegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale)
-{
-  RegisterClass reg_class = oat_reg_class_by_size(size);
-  int len_offset = Array::LengthOffset().Int32Value();
-  int data_offset;
-  RegLocation rl_result;
-  rl_array = LoadValue(cu, rl_array, kCoreReg);
-  rl_index = LoadValue(cu, rl_index, kCoreReg);
-
-  if (size == kLong || size == kDouble) {
-    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  /* null object? */
-  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
-
-  if (cu->instruction_set == kX86) {
-    if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-      /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
-      GenRegMemCheck(cu, kCondUge, rl_index.low_reg, rl_array.low_reg,
-                     len_offset, kThrowArrayBounds);
-    }
-    if ((size == kLong) || (size == kDouble)) {
-      int reg_addr = AllocTemp(cu);
-      OpLea(cu, reg_addr, rl_array.low_reg, rl_index.low_reg, scale, data_offset);
-      FreeTemp(cu, rl_array.low_reg);
-      FreeTemp(cu, rl_index.low_reg);
-      rl_result = EvalLoc(cu, rl_dest, reg_class, true);
-      LoadBaseIndexedDisp(cu, reg_addr, INVALID_REG, 0, 0, rl_result.low_reg,
-                          rl_result.high_reg, size, INVALID_SREG);
-      StoreValueWide(cu, rl_dest, rl_result);
-    } else {
-      rl_result = EvalLoc(cu, rl_dest, reg_class, true);
-
-      LoadBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale,
-                          data_offset, rl_result.low_reg, INVALID_REG, size,
-                          INVALID_SREG);
-
-      StoreValue(cu, rl_dest, rl_result);
-    }
-  } else {
-    int reg_ptr = AllocTemp(cu);
-    bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-    int reg_len = INVALID_REG;
-    if (needs_range_check) {
-      reg_len = AllocTemp(cu);
-      /* Get len */
-      LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
-    }
-    /* reg_ptr -> array data */
-    OpRegRegImm(cu, kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
-    FreeTemp(cu, rl_array.low_reg);
-    if ((size == kLong) || (size == kDouble)) {
-      if (scale) {
-        int r_new_index = AllocTemp(cu);
-        OpRegRegImm(cu, kOpLsl, r_new_index, rl_index.low_reg, scale);
-        OpRegReg(cu, kOpAdd, reg_ptr, r_new_index);
-        FreeTemp(cu, r_new_index);
-      } else {
-        OpRegReg(cu, kOpAdd, reg_ptr, rl_index.low_reg);
-      }
-      FreeTemp(cu, rl_index.low_reg);
-      rl_result = EvalLoc(cu, rl_dest, reg_class, true);
-
-      if (needs_range_check) {
-        // TODO: change kCondCS to a more meaningful name, is the sense of
-        // carry-set/clear flipped?
-        GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
-        FreeTemp(cu, reg_len);
-      }
-      LoadPair(cu, reg_ptr, rl_result.low_reg, rl_result.high_reg);
-
-      FreeTemp(cu, reg_ptr);
-      StoreValueWide(cu, rl_dest, rl_result);
-    } else {
-      rl_result = EvalLoc(cu, rl_dest, reg_class, true);
-
-      if (needs_range_check) {
-        // TODO: change kCondCS to a more meaningful name, is the sense of
-        // carry-set/clear flipped?
-        GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
-        FreeTemp(cu, reg_len);
-      }
-      LoadBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size);
-
-      FreeTemp(cu, reg_ptr);
-      StoreValue(cu, rl_dest, rl_result);
-    }
-  }
-}
-
-/*
- * Generate array store
- *
- */
-void Codegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale)
-{
-  RegisterClass reg_class = oat_reg_class_by_size(size);
-  int len_offset = Array::LengthOffset().Int32Value();
-  int data_offset;
-
-  if (size == kLong || size == kDouble) {
-    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  rl_array = LoadValue(cu, rl_array, kCoreReg);
-  rl_index = LoadValue(cu, rl_index, kCoreReg);
-  int reg_ptr = INVALID_REG;
-  if (cu->instruction_set != kX86) {
-    if (IsTemp(cu, rl_array.low_reg)) {
-      Clobber(cu, rl_array.low_reg);
-      reg_ptr = rl_array.low_reg;
-    } else {
-      reg_ptr = AllocTemp(cu);
-      OpRegCopy(cu, reg_ptr, rl_array.low_reg);
-    }
-  }
-
-  /* null object? */
-  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
-
-  if (cu->instruction_set == kX86) {
-    if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-      /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
-      GenRegMemCheck(cu, kCondUge, rl_index.low_reg, rl_array.low_reg, len_offset, kThrowArrayBounds);
-    }
-    if ((size == kLong) || (size == kDouble)) {
-      rl_src = LoadValueWide(cu, rl_src, reg_class);
-    } else {
-      rl_src = LoadValue(cu, rl_src, reg_class);
-    }
-    // If the src reg can't be byte accessed, move it to a temp first.
-    if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) {
-      int temp = AllocTemp(cu);
-      OpRegCopy(cu, temp, rl_src.low_reg);
-      StoreBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp,
-                           INVALID_REG, size, INVALID_SREG);
-    } else {
-      StoreBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg,
-                           rl_src.high_reg, size, INVALID_SREG);
-    }
-  } else {
-    bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-    int reg_len = INVALID_REG;
-    if (needs_range_check) {
-      reg_len = AllocTemp(cu);
-      //NOTE: max live temps(4) here.
-      /* Get len */
-      LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
-    }
-    /* reg_ptr -> array data */
-    OpRegImm(cu, kOpAdd, reg_ptr, data_offset);
-    /* at this point, reg_ptr points to array, 2 live temps */
-    if ((size == kLong) || (size == kDouble)) {
-      //TUNING: specific wide routine that can handle fp regs
-      if (scale) {
-        int r_new_index = AllocTemp(cu);
-        OpRegRegImm(cu, kOpLsl, r_new_index, rl_index.low_reg, scale);
-        OpRegReg(cu, kOpAdd, reg_ptr, r_new_index);
-        FreeTemp(cu, r_new_index);
-      } else {
-        OpRegReg(cu, kOpAdd, reg_ptr, rl_index.low_reg);
-      }
-      rl_src = LoadValueWide(cu, rl_src, reg_class);
-
-      if (needs_range_check) {
-        GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
-        FreeTemp(cu, reg_len);
-      }
-
-      StoreBaseDispWide(cu, reg_ptr, 0, rl_src.low_reg, rl_src.high_reg);
-
-      FreeTemp(cu, reg_ptr);
-    } else {
-      rl_src = LoadValue(cu, rl_src, reg_class);
-      if (needs_range_check) {
-        GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
-        FreeTemp(cu, reg_len);
-      }
-      StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg,
-                       scale, size);
-    }
-  }
-}
-
 void Codegen::GenLong3Addr(CompilationUnit* cu, OpKind first_op, OpKind second_op,
                            RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
 {
@@ -1683,10 +1452,18 @@
       break;
     }
 
+    case Instruction::SUB_INT:
+    case Instruction::SUB_INT_2ADDR:
+      lit = -lit;
+      // Intended fallthrough
+    case Instruction::ADD_INT:
+    case Instruction::ADD_INT_2ADDR:
     case Instruction::ADD_INT_LIT8:
     case Instruction::ADD_INT_LIT16:
       op = kOpAdd;
       break;
+    case Instruction::MUL_INT:
+    case Instruction::MUL_INT_2ADDR:
     case Instruction::MUL_INT_LIT8:
     case Instruction::MUL_INT_LIT16: {
       if (HandleEasyMultiply(cu, rl_src, rl_dest, lit)) {
@@ -1695,39 +1472,52 @@
       op = kOpMul;
       break;
     }
+    case Instruction::AND_INT:
+    case Instruction::AND_INT_2ADDR:
     case Instruction::AND_INT_LIT8:
     case Instruction::AND_INT_LIT16:
       op = kOpAnd;
       break;
+    case Instruction::OR_INT:
+    case Instruction::OR_INT_2ADDR:
     case Instruction::OR_INT_LIT8:
     case Instruction::OR_INT_LIT16:
       op = kOpOr;
       break;
+    case Instruction::XOR_INT:
+    case Instruction::XOR_INT_2ADDR:
     case Instruction::XOR_INT_LIT8:
     case Instruction::XOR_INT_LIT16:
       op = kOpXor;
       break;
     case Instruction::SHL_INT_LIT8:
     case Instruction::SHL_INT:
+    case Instruction::SHL_INT_2ADDR:
       lit &= 31;
       shift_op = true;
       op = kOpLsl;
       break;
     case Instruction::SHR_INT_LIT8:
     case Instruction::SHR_INT:
+    case Instruction::SHR_INT_2ADDR:
       lit &= 31;
       shift_op = true;
       op = kOpAsr;
       break;
     case Instruction::USHR_INT_LIT8:
     case Instruction::USHR_INT:
+    case Instruction::USHR_INT_2ADDR:
       lit &= 31;
       shift_op = true;
       op = kOpLsr;
       break;
 
+    case Instruction::DIV_INT:
+    case Instruction::DIV_INT_2ADDR:
     case Instruction::DIV_INT_LIT8:
     case Instruction::DIV_INT_LIT16:
+    case Instruction::REM_INT:
+    case Instruction::REM_INT_2ADDR:
     case Instruction::REM_INT_LIT8:
     case Instruction::REM_INT_LIT16: {
       if (lit == 0) {
@@ -1738,6 +1528,8 @@
         return false;
       }
       if ((opcode == Instruction::DIV_INT_LIT8) ||
+          (opcode == Instruction::DIV_INT) ||
+          (opcode == Instruction::DIV_INT_2ADDR) ||
           (opcode == Instruction::DIV_INT_LIT16)) {
         is_div = true;
       } else {
@@ -1762,7 +1554,7 @@
       break;
     }
     default:
-      return true;
+      LOG(FATAL) << "Unexpected opcode " << opcode;
   }
   rl_src = LoadValue(cu, rl_src, kCoreReg);
   rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
diff --git a/src/compiler/codegen/gen_invoke.cc b/src/compiler/codegen/gen_invoke.cc
index ebc1a98..f354152 100644
--- a/src/compiler/codegen/gen_invoke.cc
+++ b/src/compiler/codegen/gen_invoke.cc
@@ -602,7 +602,10 @@
       next_reg++;
       next_arg++;
     } else {
-      rl_arg.wide = false;
+      if (rl_arg.wide) {
+        rl_arg.wide = false;
+        rl_arg.is_const = false;
+      }
       cg->LoadValueDirectFixed(cu, rl_arg, next_reg);
     }
     call_state = next_call_insn(cu, info, call_state, dex_idx, method_idx,
diff --git a/src/compiler/codegen/gen_loadstore.cc b/src/compiler/codegen/gen_loadstore.cc
index fe08caa..eec74af 100644
--- a/src/compiler/codegen/gen_loadstore.cc
+++ b/src/compiler/codegen/gen_loadstore.cc
@@ -92,7 +92,11 @@
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
-    LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest);
+    if (rl_src.is_const && InexpensiveConstant(r_dest, cu->constant_values[rl_src.orig_sreg])) {
+      LoadConstantNoClobber(cu, r_dest, cu->constant_values[rl_src.orig_sreg]);
+    } else {
+      LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest);
+    }
   }
 }
 
diff --git a/src/compiler/codegen/mips/codegen_mips.h b/src/compiler/codegen/mips/codegen_mips.h
index aaa03c0..4178f2e 100644
--- a/src/compiler/codegen/mips/codegen_mips.h
+++ b/src/compiler/codegen/mips/codegen_mips.h
@@ -39,7 +39,6 @@
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
     virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
                                        int val_lo, int val_hi);
-    virtual void LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,6 +89,12 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                                RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale);
+    virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -184,6 +189,7 @@
     void SpillCoreRegs(CompilationUnit* cu);
     void UnSpillCoreRegs(CompilationUnit* cu);
     static const MipsEncodingMap EncodingMap[kMipsLast];
+    bool InexpensiveConstant(int reg, int value);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/mips/int_mips.cc b/src/compiler/codegen/mips/int_mips.cc
index bb36dc1..e2a5a02 100644
--- a/src/compiler/codegen/mips/int_mips.cc
+++ b/src/compiler/codegen/mips/int_mips.cc
@@ -432,4 +432,207 @@
   return false;
 }
 
+/*
+ * Generate array load
+ */
+void MipsCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_dest, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+  if (size == kLong || size == kDouble) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  int reg_ptr = AllocTemp(cu);
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = AllocTemp(cu);
+    /* Get len */
+    LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
+  }
+  /* reg_ptr -> array data */
+  OpRegRegImm(cu, kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
+  FreeTemp(cu, rl_array.low_reg);
+  if ((size == kLong) || (size == kDouble)) {
+    if (scale) {
+      int r_new_index = AllocTemp(cu);
+      OpRegRegImm(cu, kOpLsl, r_new_index, rl_index.low_reg, scale);
+      OpRegReg(cu, kOpAdd, reg_ptr, r_new_index);
+      FreeTemp(cu, r_new_index);
+    } else {
+      OpRegReg(cu, kOpAdd, reg_ptr, rl_index.low_reg);
+    }
+    FreeTemp(cu, rl_index.low_reg);
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      // TODO: change kCondCS to a more meaningful name, is the sense of
+      // carry-set/clear flipped?
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    LoadBaseDispWide(cu, reg_ptr, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
+
+    FreeTemp(cu, reg_ptr);
+    StoreValueWide(cu, rl_dest, rl_result);
+  } else {
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      // TODO: change kCondCS to a more meaningful name, is the sense of
+      // carry-set/clear flipped?
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    LoadBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size);
+
+    FreeTemp(cu, reg_ptr);
+    StoreValue(cu, rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void MipsCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+
+  if (size == kLong || size == kDouble) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+  int reg_ptr = INVALID_REG;
+  if (IsTemp(cu, rl_array.low_reg)) {
+    Clobber(cu, rl_array.low_reg);
+    reg_ptr = rl_array.low_reg;
+  } else {
+    reg_ptr = AllocTemp(cu);
+    OpRegCopy(cu, reg_ptr, rl_array.low_reg);
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = AllocTemp(cu);
+    //NOTE: max live temps(4) here.
+    /* Get len */
+    LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
+  }
+  /* reg_ptr -> array data */
+  OpRegImm(cu, kOpAdd, reg_ptr, data_offset);
+  /* at this point, reg_ptr points to array, 2 live temps */
+  if ((size == kLong) || (size == kDouble)) {
+    //TUNING: specific wide routine that can handle fp regs
+    if (scale) {
+      int r_new_index = AllocTemp(cu);
+      OpRegRegImm(cu, kOpLsl, r_new_index, rl_index.low_reg, scale);
+      OpRegReg(cu, kOpAdd, reg_ptr, r_new_index);
+      FreeTemp(cu, r_new_index);
+    } else {
+      OpRegReg(cu, kOpAdd, reg_ptr, rl_index.low_reg);
+    }
+    rl_src = LoadValueWide(cu, rl_src, reg_class);
+
+    if (needs_range_check) {
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+
+    StoreBaseDispWide(cu, reg_ptr, 0, rl_src.low_reg, rl_src.high_reg);
+
+    FreeTemp(cu, reg_ptr);
+  } else {
+    rl_src = LoadValue(cu, rl_src, reg_class);
+    if (needs_range_check) {
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg,
+                     scale, size);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void MipsCodegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset = Array::DataOffset(sizeof(Object*)).Int32Value();
+
+  FlushAllRegs(cu);  // Use explicit registers
+  LockCallTemps(cu);
+
+  int r_value = TargetReg(kArg0);  // Register holding value
+  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
+  int r_array = TargetReg(kArg2);  // Register holding array
+  int r_index = TargetReg(kArg3);  // Register holding index into array
+
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Grab array
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Grab value
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Grab index
+
+  GenNullCheck(cu, rl_array.s_reg_low, r_array, opt_flags);  // NPE?
+
+  // Store of null?
+  LIR* null_value_check = OpCmpImmBranch(cu, kCondEq, r_value, 0, NULL);
+
+  // Get the array's class.
+  LoadWordDisp(cu, r_array, Object::ClassOffset().Int32Value(), r_array_class);
+  CallRuntimeHelperRegReg(cu, ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value,
+                          r_array_class, true);
+  // Redo LoadValues in case they didn't survive the call.
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Reload array
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Reload index
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Reload value
+  r_array_class = INVALID_REG;
+
+  // Branch here if value to be stored == null
+  LIR* target = NewLIR0(cu, kPseudoTargetLabel);
+  null_value_check->target = target;
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = TargetReg(kArg1);
+    LoadWordDisp(cu, r_array, len_offset, reg_len);  // Get len
+  }
+  /* r_ptr -> array data */
+  int r_ptr = AllocTemp(cu);
+  OpRegRegImm(cu, kOpAdd, r_ptr, r_array, data_offset);
+  if (needs_range_check) {
+    GenRegRegCheck(cu, kCondCs, r_index, reg_len, kThrowArrayBounds);
+  }
+  StoreBaseIndexed(cu, r_ptr, r_index, r_value, scale, kWord);
+  FreeTemp(cu, r_ptr);
+  FreeTemp(cu, r_index);
+  MarkGCCard(cu, r_value, r_array);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/mips/utility_mips.cc b/src/compiler/codegen/mips/utility_mips.cc
index 44d75d1..4d4be76 100644
--- a/src/compiler/codegen/mips/utility_mips.cc
+++ b/src/compiler/codegen/mips/utility_mips.cc
@@ -52,6 +52,19 @@
   return res;
 }
 
+bool MipsCodegen::InexpensiveConstant(int reg, int value)
+{
+  bool res = false;
+  if (value == 0) {
+    res = true;
+  } else if (IsUint(16, value)) {
+    res = true;
+  } else if ((value < 0) && (value >= -32768)) {
+    res = true;
+  }
+  return res;
+}
+
 /*
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.  If target is
@@ -640,12 +653,6 @@
   return StoreBaseDispBody(cu, rBase, displacement, r_src_lo, r_src_hi, kLong);
 }
 
-void MipsCodegen::LoadPair(CompilationUnit *cu, int base, int low_reg, int high_reg)
-{
-  LoadWordDisp(cu, base, LOWORD_OFFSET , low_reg);
-  LoadWordDisp(cu, base, HIWORD_OFFSET , high_reg);
-}
-
 LIR* MipsCodegen::OpThreadMem(CompilationUnit* cu, OpKind op, int thread_offset)
 {
   LOG(FATAL) << "Unexpected use of OpThreadMem for MIPS";
diff --git a/src/compiler/codegen/mir_to_lir.cc b/src/compiler/codegen/mir_to_lir.cc
index 6ec7edb..acdeafe 100644
--- a/src/compiler/codegen/mir_to_lir.cc
+++ b/src/compiler/codegen/mir_to_lir.cc
@@ -278,11 +278,22 @@
       LIR* fall_through = &label_list[bb->fall_through->id];
       bool backward_branch;
       backward_branch = (bb->taken->start_offset <= mir->offset);
-      if (backward_branch) {
-        cg->GenSuspendTest(cu, opt_flags);
+      // Result known at compile time?
+      if (rl_src[0].is_const && rl_src[1].is_const) {
+        bool is_taken = EvaluateBranch(opcode, cu->constant_values[rl_src[0].orig_sreg],
+                                       cu->constant_values[rl_src[1].orig_sreg]);
+        if (is_taken && backward_branch) {
+          cg->GenSuspendTest(cu, opt_flags);
+        }
+        int id = is_taken ? bb->taken->id : bb->fall_through->id;
+        cg->OpUnconditionalBranch(cu, &label_list[id]);
+      } else {
+        if (backward_branch) {
+          cg->GenSuspendTest(cu, opt_flags);
+        }
+        cg->GenCompareAndBranch(cu, opcode, rl_src[0], rl_src[1], taken,
+                                fall_through);
       }
-      cg->GenCompareAndBranch(cu, opcode, rl_src[0], rl_src[1], taken,
-                          fall_through);
       break;
       }
 
@@ -296,10 +307,20 @@
       LIR* fall_through = &label_list[bb->fall_through->id];
       bool backward_branch;
       backward_branch = (bb->taken->start_offset <= mir->offset);
-      if (backward_branch) {
-        cg->GenSuspendTest(cu, opt_flags);
+      // Result known at compile time?
+      if (rl_src[0].is_const) {
+        bool is_taken = EvaluateBranch(opcode, cu->constant_values[rl_src[0].orig_sreg], 0);
+        if (is_taken && backward_branch) {
+          cg->GenSuspendTest(cu, opt_flags);
+        }
+        int id = is_taken ? bb->taken->id : bb->fall_through->id;
+        cg->OpUnconditionalBranch(cu, &label_list[id]);
+      } else {
+        if (backward_branch) {
+          cg->GenSuspendTest(cu, opt_flags);
+        }
+        cg->GenCompareZeroAndBranch(cu, opcode, rl_src[0], taken, fall_through);
       }
-      cg->GenCompareZeroAndBranch(cu, opcode, rl_src[0], taken, fall_through);
       break;
       }
 
@@ -504,29 +525,49 @@
       cg->GenConversion(cu, opcode, rl_dest, rl_src[0]);
       break;
 
+
     case Instruction::ADD_INT:
-    case Instruction::SUB_INT:
-    case Instruction::MUL_INT:
-    case Instruction::DIV_INT:
-    case Instruction::REM_INT:
-    case Instruction::AND_INT:
-    case Instruction::OR_INT:
-    case Instruction::XOR_INT:
-    case Instruction::SHL_INT:
-    case Instruction::SHR_INT:
-    case Instruction::USHR_INT:
     case Instruction::ADD_INT_2ADDR:
-    case Instruction::SUB_INT_2ADDR:
+    case Instruction::MUL_INT:
     case Instruction::MUL_INT_2ADDR:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::REM_INT_2ADDR:
+    case Instruction::AND_INT:
     case Instruction::AND_INT_2ADDR:
+    case Instruction::OR_INT:
     case Instruction::OR_INT_2ADDR:
+    case Instruction::XOR_INT:
     case Instruction::XOR_INT_2ADDR:
+      if (rl_src[0].is_const &&
+          cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[0].orig_sreg])) {
+        cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[1],
+                             cu->constant_values[rl_src[0].orig_sreg]);
+      } else if (rl_src[1].is_const &&
+          cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) {
+        cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0],
+                             cu->constant_values[rl_src[1].orig_sreg]);
+      } else {
+        cg->GenArithOpInt(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+      }
+      break;
+
+    case Instruction::SUB_INT:
+    case Instruction::SUB_INT_2ADDR:
+    case Instruction::DIV_INT:
+    case Instruction::DIV_INT_2ADDR:
+    case Instruction::REM_INT:
+    case Instruction::REM_INT_2ADDR:
+    case Instruction::SHL_INT:
     case Instruction::SHL_INT_2ADDR:
+    case Instruction::SHR_INT:
     case Instruction::SHR_INT_2ADDR:
+    case Instruction::USHR_INT:
     case Instruction::USHR_INT_2ADDR:
-      cg->GenArithOpInt(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+      if (rl_src[1].is_const &&
+          cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) {
+        cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0],
+                             cu->constant_values[rl_src[1].orig_sreg]);
+      } else {
+        cg->GenArithOpInt(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+      }
       break;
 
     case Instruction::ADD_LONG:
diff --git a/src/compiler/codegen/ralloc_util.cc b/src/compiler/codegen/ralloc_util.cc
index 1a3a413..accf676 100644
--- a/src/compiler/codegen/ralloc_util.cc
+++ b/src/compiler/codegen/ralloc_util.cc
@@ -1091,12 +1091,18 @@
     RegLocation loc = cu->reg_location[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(cu, loc.s_reg_low);
+    int sample_reg = loc.fp ? cu->reg_pool->FPRegs[0].reg : cu->reg_pool->core_regs[0].reg;
+    bool simple_immediate = loc.is_const &&
+        !cu->cg->InexpensiveConstant(sample_reg, cu->constant_values[loc.orig_sreg]);
     if (loc.defined) {
-      counts[p_map_idx].count += cu->use_counts.elem_list[i];
+      // Don't count easily regenerated immediates
+      if (!simple_immediate) {
+        counts[p_map_idx].count += cu->use_counts.elem_list[i];
+      }
     }
     if (loc.wide) {
       if (loc.defined) {
-        if (loc.fp) {
+        if (loc.fp && !simple_immediate) {
           counts[p_map_idx].double_start = true;
           counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1];
         }
diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h
index 4ef186a..f467e83 100644
--- a/src/compiler/codegen/x86/codegen_x86.h
+++ b/src/compiler/codegen/x86/codegen_x86.h
@@ -40,7 +40,6 @@
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
     virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
                                        int val_lo, int val_hi);
-    virtual void LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -91,6 +90,12 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                                RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale);
+    virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -182,6 +187,7 @@
     void SpillCoreRegs(CompilationUnit* cu);
     void UnSpillCoreRegs(CompilationUnit* cu);
     static const X86EncodingMap EncodingMap[kX86Last];
+    bool InexpensiveConstant(int reg, int value);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc
index bd3a7fa..0f1fc53 100644
--- a/src/compiler/codegen/x86/int_x86.cc
+++ b/src/compiler/codegen/x86/int_x86.cc
@@ -439,4 +439,148 @@
   NewLIR2(cu, opcode, r_dest, thread_offset);
 }
 
+/*
+ * Generate array load
+ */
+void X86Codegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_dest, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+  if (size == kLong || size == kDouble) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
+    /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
+    GenRegMemCheck(cu, kCondUge, rl_index.low_reg, rl_array.low_reg,
+                   len_offset, kThrowArrayBounds);
+  }
+  if ((size == kLong) || (size == kDouble)) {
+    int reg_addr = AllocTemp(cu);
+    OpLea(cu, reg_addr, rl_array.low_reg, rl_index.low_reg, scale, data_offset);
+    FreeTemp(cu, rl_array.low_reg);
+    FreeTemp(cu, rl_index.low_reg);
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+    LoadBaseIndexedDisp(cu, reg_addr, INVALID_REG, 0, 0, rl_result.low_reg,
+                        rl_result.high_reg, size, INVALID_SREG);
+    StoreValueWide(cu, rl_dest, rl_result);
+  } else {
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    LoadBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale,
+                        data_offset, rl_result.low_reg, INVALID_REG, size,
+                        INVALID_SREG);
+
+    StoreValue(cu, rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void X86Codegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+
+  if (size == kLong || size == kDouble) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
+    /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
+    GenRegMemCheck(cu, kCondUge, rl_index.low_reg, rl_array.low_reg, len_offset, kThrowArrayBounds);
+  }
+  if ((size == kLong) || (size == kDouble)) {
+    rl_src = LoadValueWide(cu, rl_src, reg_class);
+  } else {
+    rl_src = LoadValue(cu, rl_src, reg_class);
+  }
+  // If the src reg can't be byte accessed, move it to a temp first.
+  if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) {
+    int temp = AllocTemp(cu);
+    OpRegCopy(cu, temp, rl_src.low_reg);
+    StoreBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp,
+                         INVALID_REG, size, INVALID_SREG);
+  } else {
+    StoreBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg,
+                         rl_src.high_reg, size, INVALID_SREG);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void X86Codegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset = Array::DataOffset(sizeof(Object*)).Int32Value();
+
+  FlushAllRegs(cu);  // Use explicit registers
+  LockCallTemps(cu);
+
+  int r_value = TargetReg(kArg0);  // Register holding value
+  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
+  int r_array = TargetReg(kArg2);  // Register holding array
+  int r_index = TargetReg(kArg3);  // Register holding index into array
+
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Grab array
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Grab value
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Grab index
+
+  GenNullCheck(cu, rl_array.s_reg_low, r_array, opt_flags);  // NPE?
+
+  // Store of null?
+  LIR* null_value_check = OpCmpImmBranch(cu, kCondEq, r_value, 0, NULL);
+
+  // Get the array's class.
+  LoadWordDisp(cu, r_array, Object::ClassOffset().Int32Value(), r_array_class);
+  CallRuntimeHelperRegReg(cu, ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value,
+                          r_array_class, true);
+  // Redo LoadValues in case they didn't survive the call.
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Reload array
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Reload index
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Reload value
+  r_array_class = INVALID_REG;
+
+  // Branch here if value to be stored == null
+  LIR* target = NewLIR0(cu, kPseudoTargetLabel);
+  null_value_check->target = target;
+
+  // make an extra temp available for card mark below
+  FreeTemp(cu, TargetReg(kArg1));
+  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
+    /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
+    GenRegMemCheck(cu, kCondUge, r_index, r_array, len_offset, kThrowArrayBounds);
+  }
+  StoreBaseIndexedDisp(cu, r_array, r_index, scale,
+                       data_offset, r_value, INVALID_REG, kWord, INVALID_SREG);
+  FreeTemp(cu, r_index);
+  MarkGCCard(cu, r_value, r_array);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/x86/utility_x86.cc b/src/compiler/codegen/x86/utility_x86.cc
index bdbc547..ce55b4b 100644
--- a/src/compiler/codegen/x86/utility_x86.cc
+++ b/src/compiler/codegen/x86/utility_x86.cc
@@ -50,6 +50,11 @@
   return res;
 }
 
+bool X86Codegen::InexpensiveConstant(int reg, int value)
+{
+  return true;
+}
+
 /*
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.  If target is
@@ -559,9 +564,4 @@
                               r_src_lo, r_src_hi, kLong, INVALID_SREG);
 }
 
-void X86Codegen::LoadPair(CompilationUnit *cu, int base, int low_reg, int high_reg)
-{
-  LoadBaseDispWide(cu, base, 0, low_reg, high_reg, INVALID_SREG);
-}
-
 }  // namespace art