Add a prepare for register allocation pass.

- Currently the pass just changes the uses of checks to the
  actual values.
- Also optimize array access, now that inputs can be constants.
- And fix another bug in the register allocator reveiled by
  this change.

Change-Id: I43be0dbde9330ee5c8f9d678de11361292d8bd98
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 133044a..edc5bd0 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -100,6 +100,7 @@
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
+	optimizing/prepare_for_register_allocation.cc \
 	optimizing/register_allocator.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2aa04d4..9d875df 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1311,8 +1311,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
@@ -1321,12 +1322,15 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  DCHECK(obj.Equals(locations->Out()));
 
   if (obj.IsRegister()) {
     __ cmp(obj.AsArm().AsCoreRegister(), ShifterOperand(0));
+    __ b(slow_path->GetEntryLabel(), EQ);
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ b(slow_path->GetEntryLabel());
   }
-  __ b(slow_path->GetEntryLabel(), EQ);
 }
 
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
@@ -1550,8 +1554,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6791003..658aed9 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1302,8 +1302,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::Any());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
@@ -1312,13 +1313,16 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  DCHECK(obj.Equals(locations->Out()));
 
   if (obj.IsRegister()) {
     __ cmpl(obj.AsX86().AsCpuRegister(), Immediate(0));
-  } else {
-    DCHECK(locations->InAt(0).IsStackSlot());
+  } else if (obj.IsStackSlot()) {
     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ jmp(slow_path->GetEntryLabel());
+    return;
   }
   __ j(kEqual, slow_path->GetEntryLabel());
 }
@@ -1443,9 +1447,10 @@
         1, Location::RegisterOrConstant(instruction->InputAt(1)), dies_at_entry);
     if (value_type == Primitive::kPrimBoolean || value_type == Primitive::kPrimByte) {
       // Ensure the value is in a byte register.
-      locations->SetInAt(2, X86CpuLocation(EAX), dies_at_entry);
+      locations->SetInAt(2, Location::ByteRegisterOrConstant(
+          X86ManagedRegister::FromCpuRegister(EAX), instruction->InputAt(2)), dies_at_entry);
     } else {
-      locations->SetInAt(2, Location::RequiresRegister(), dies_at_entry);
+      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), dies_at_entry);
     }
   }
 }
@@ -1454,18 +1459,29 @@
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsX86().AsCpuRegister();
   Location index = locations->InAt(1);
+  Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      ByteRegister value = locations->InAt(2).AsX86().AsByteRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ movb(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, offset), value.AsX86().AsByteRegister());
+        } else {
+          __ movb(Address(obj, offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset),
+                  value.AsX86().AsByteRegister());
+        } else {
+          __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1473,24 +1489,43 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register value = locations->InAt(2).AsX86().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ movw(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, offset), value.AsX86().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset),
+                  value.AsX86().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
 
     case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsX86().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movl(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, offset), value.AsX86().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset),
+                  value.AsX86().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1504,16 +1539,29 @@
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      X86ManagedRegister value = locations->InAt(2).AsX86();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movl(Address(obj, offset), value.AsRegisterPairLow());
-        __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh());
+        if (value.IsRegister()) {
+          __ movl(Address(obj, offset), value.AsX86().AsRegisterPairLow());
+          __ movl(Address(obj, offset + kX86WordSize), value.AsX86().AsRegisterPairHigh());
+        } else {
+          int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
+          __ movl(Address(obj, offset), Immediate(Low32Bits(val)));
+          __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val)));
+        }
       } else {
-        __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset),
-                value.AsRegisterPairLow());
-        __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize),
-                value.AsRegisterPairHigh());
+        if (value.IsRegister()) {
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset),
+                  value.AsX86().AsRegisterPairLow());
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize),
+                  value.AsX86().AsRegisterPairHigh());
+        } else {
+          int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset),
+                  Immediate(Low32Bits(val)));
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize),
+                  Immediate(High32Bits(val)));
+        }
       }
       break;
     }
@@ -1547,8 +1595,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e0e0c79..a4efe4f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1149,8 +1149,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::Any());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
@@ -1159,13 +1160,16 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  DCHECK(obj.Equals(locations->Out()));
 
   if (obj.IsRegister()) {
     __ cmpl(obj.AsX86_64().AsCpuRegister(), Immediate(0));
-  } else {
-    DCHECK(locations->InAt(0).IsStackSlot());
+  } else if (obj.IsStackSlot()) {
     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ jmp(slow_path->GetEntryLabel());
+    return;
   }
   __ j(kEqual, slow_path->GetEntryLabel());
 }
@@ -1283,6 +1287,11 @@
     locations->SetInAt(
         1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
     locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+    if (value_type == Primitive::kPrimLong) {
+      locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+    } else {
+      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), Location::kDiesAtEntry);
+    }
   }
 }
 
@@ -1290,18 +1299,28 @@
   LocationSummary* locations = instruction->GetLocations();
   CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister();
   Location index = locations->InAt(1);
+  Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ movb(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, offset), value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movb(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset),
+                  value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1309,24 +1328,42 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ movw(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, offset), value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset),
+                  value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
 
     case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movl(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, offset), value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset),
+                  value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1340,12 +1377,14 @@
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movq(Address(obj, offset), value);
+        DCHECK(value.IsRegister());
+        __ movq(Address(obj, offset), value.AsX86_64().AsCpuRegister());
       } else {
-        __ movq(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_8, data_offset), value);
+        DCHECK(value.IsRegister());
+        __ movq(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_8, data_offset),
+                value.AsX86_64().AsCpuRegister());
       }
       break;
     }
@@ -1379,8 +1418,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0fb4737..686a0b0 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -158,6 +158,7 @@
       }
     }
     output_ << ")";
+    output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
   }
 
   void VisitInstruction(HInstruction* instruction) {
@@ -191,6 +192,7 @@
           DumpLocation(locations->Out(), instruction->GetType());
         }
       }
+      output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
     }
   }
 
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 114d69c..7b09241 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -55,4 +55,15 @@
       : Location::RequiresRegister();
 }
 
+Location Location::ByteRegisterOrConstant(ManagedRegister reg, HInstruction* instruction) {
+  return instruction->IsConstant()
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RegisterLocation(reg);
+}
+
+std::ostream& operator<<(std::ostream& os, const Location& location) {
+  os << location.DebugString();
+  return os;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 8d0715a..0e77deb 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -221,6 +221,7 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
+  static Location ByteRegisterOrConstant(ManagedRegister reg, HInstruction* instruction);
 
   // The location of the first input to the instruction will be
   // used to replace this unallocated location.
@@ -422,6 +423,8 @@
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
 
+std::ostream& operator<<(std::ostream& os, const Location& location);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_LOCATIONS_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 65bdb18..3cf5a0b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -28,6 +28,7 @@
 #include "gvn.h"
 #include "instruction_simplifier.h"
 #include "nodes.h"
+#include "prepare_for_register_allocation.h"
 #include "register_allocator.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
@@ -265,6 +266,7 @@
     InstructionSimplifier(graph).Run();
     GlobalValueNumberer(graph->GetArena(), graph).Run();
     visualizer.DumpGraph(kGVNPassName);
+    PrepareForRegisterAllocation(graph).Run();
 
     SsaLivenessAnalysis liveness(*graph, codegen);
     liveness.Analyze();
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
new file mode 100644
index 0000000..bfbbab5
--- /dev/null
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "prepare_for_register_allocation.h"
+
+namespace art {
+
+void PrepareForRegisterAllocation::Run() {
+  // Order does not matter.
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    // No need to visit the phis.
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      it.Current()->Accept(this);
+    }
+  }
+}
+
+void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
+  check->ReplaceWith(check->InputAt(0));
+}
+
+void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
+  check->ReplaceWith(check->InputAt(0));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
new file mode 100644
index 0000000..37f2871
--- /dev/null
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
+#define ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * A simplification pass over the graph before doing register allocation.
+ * For example it changes uses of null checks and bounds checks to the original
+ * objects, to avoid creating a live range for these checks.
+ */
+class PrepareForRegisterAllocation : public HGraphVisitor {
+ public:
+  explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphVisitor(graph) {}
+
+  void Run();
+
+ private:
+  virtual void VisitNullCheck(HNullCheck* check) OVERRIDE;
+  virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+
+  DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1d1d694..013ab72 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -795,7 +795,7 @@
     move = at->GetNext()->AsParallelMove();
     // This is a parallel move for connecting siblings in a same block. We need to
     // differentiate it with moves for connecting blocks, and input moves.
-    if (move == nullptr || move->GetLifetimePosition() != position) {
+    if (move == nullptr || IsInputMove(move) || move->GetLifetimePosition() > position) {
       move = new (allocator_) HParallelMove(allocator_);
       move->SetLifetimePosition(position);
       at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
@@ -803,16 +803,11 @@
   } else {
     // Move must happen before the instruction.
     HInstruction* previous = at->GetPrevious();
-    if (previous != nullptr && previous->IsParallelMove()) {
+    if (previous != nullptr && previous->IsParallelMove() && IsInputMove(previous)) {
       // This is a parallel move for connecting siblings in a same block. We need to
-      // differentiate it with moves for connecting blocks, and input moves.
-      if (previous->GetLifetimePosition() != position) {
-        // If the previous instruction of the previous instruction is not a parallel
-        // move, we have to insert the new parallel move before the input or connecting
-        // block moves.
-        at = previous;
-        previous = previous->GetPrevious();
-      }
+      // differentiate it with input moves.
+      at = previous;
+      previous = previous->GetPrevious();
     }
     if (previous == nullptr
         || !previous->IsParallelMove()
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index f888d46..a2cbd8b 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -243,6 +243,17 @@
 }
 
 
+void X86Assembler::movw(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOperandSizeOverride();
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  CHECK(imm.is_int16());
+  EmitUint8(imm.value() & 0xFF);
+  EmitUint8(imm.value() >> 8);
+}
+
+
 void X86Assembler::leal(Register dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x8D);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index ec983d9..2113c85 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -37,6 +37,7 @@
 
   bool is_int8() const { return IsInt(8, value_); }
   bool is_uint8() const { return IsUint(8, value_); }
+  bool is_int16() const { return IsInt(16, value_); }
   bool is_uint16() const { return IsUint(16, value_); }
 
  private:
@@ -251,6 +252,7 @@
   void movsxw(Register dst, const Address& src);
   void movw(Register dst, const Address& src);
   void movw(const Address& dst, Register src);
+  void movw(const Address& dst, const Immediate& imm);
 
   void leal(Register dst, const Address& src);
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f432e66..ade7a13 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -234,6 +234,7 @@
 
 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst);
   EmitUint8(0xC6);
   EmitOperand(Register::RAX, dst);
   CHECK(imm.is_int8());
@@ -291,6 +292,18 @@
 }
 
 
+void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOperandSizeOverride();
+  EmitOptionalRex32(dst);
+  EmitUint8(0xC7);
+  EmitOperand(Register::RAX, dst);
+  CHECK(imm.is_int16());
+  EmitUint8(imm.value() & 0xFF);
+  EmitUint8(imm.value() >> 8);
+}
+
+
 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(dst, src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 1fd65c2..c3e6e3b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -44,6 +44,7 @@
 
   bool is_int8() const { return IsInt(8, value_); }
   bool is_uint8() const { return IsUint(8, value_); }
+  bool is_int16() const { return IsInt(16, value_); }
   bool is_uint16() const { return IsUint(16, value_); }
   bool is_int32() const {
     // This does not work on 32b machines: return IsInt(32, value_);
@@ -295,6 +296,7 @@
   void movsxw(CpuRegister dst, const Address& src);
   void movw(CpuRegister dst, const Address& src);
   void movw(const Address& dst, CpuRegister src);
+  void movw(const Address& dst, const Immediate& imm);
 
   void leaq(CpuRegister dst, const Address& src);