Merge "Major refactoring of ElfBuilder."
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index b5c42f1..9e3fbbc 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -291,8 +291,12 @@
 BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
                                 BasicBlock** immed_pred_block_p,
                                 ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  if (code_offset >= current_code_item_->insns_size_in_code_units_) {
-    return nullptr;
+  if (UNLIKELY(code_offset >= current_code_item_->insns_size_in_code_units_)) {
+    // There can be a fall-through out of the method code. We shall record such a block
+    // here (assuming create==true) and check that it's dead at the end of InlineMethod().
+    // Though we're only aware of the cases where code_offset is exactly the same as
+    // insns_size_in_code_units_, treat greater code_offset the same just in case.
+    code_offset = current_code_item_->insns_size_in_code_units_;
   }
 
   int block_id = (*dex_pc_to_block_map)[code_offset];
@@ -483,6 +487,7 @@
   BasicBlock* taken_block = FindBlock(target, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block,
                                       dex_pc_to_block_map);
+  DCHECK(taken_block != nullptr);
   cur_block->taken = taken_block->id;
   taken_block->predecessors.push_back(cur_block->id);
 
@@ -494,6 +499,7 @@
                                              /* immed_pred_block_p */
                                              &cur_block,
                                              dex_pc_to_block_map);
+    DCHECK(fallthrough_block != nullptr);
     cur_block->fall_through = fallthrough_block->id;
     fallthrough_block->predecessors.push_back(cur_block->id);
   } else if (code_ptr < code_end) {
@@ -508,7 +514,8 @@
                                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   UNUSED(flags);
   const uint16_t* switch_data =
-      reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
+      reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset +
+          static_cast<int32_t>(insn->dalvikInsn.vB));
   int size;
   const int* keyTable;
   const int* target_table;
@@ -561,6 +568,7 @@
     BasicBlock* case_block = FindBlock(cur_offset + target_table[i],  /* create */ true,
                                        /* immed_pred_block_p */ &cur_block,
                                        dex_pc_to_block_map);
+    DCHECK(case_block != nullptr);
     SuccessorBlockInfo* successor_block_info =
         static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
                                                        kArenaAllocSuccessor));
@@ -576,6 +584,7 @@
   BasicBlock* fallthrough_block = FindBlock(cur_offset +  width, /* create */ true,
                                             /* immed_pred_block_p */ nullptr,
                                             dex_pc_to_block_map);
+  DCHECK(fallthrough_block != nullptr);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors.push_back(cur_block->id);
   return cur_block;
@@ -709,8 +718,8 @@
   // FindBlock lookup cache.
   ScopedArenaAllocator allocator(&cu_->arena_stack);
   ScopedArenaVector<uint16_t> dex_pc_to_block_map(allocator.Adapter());
-  dex_pc_to_block_map.resize(dex_pc_to_block_map.size() +
-                             current_code_item_->insns_size_in_code_units_);
+  dex_pc_to_block_map.resize(current_code_item_->insns_size_in_code_units_ +
+                             1 /* Fall-through on last insn; dead or punt to interpreter. */);
 
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -876,6 +885,20 @@
   if (cu_->verbose) {
     DumpMIRGraph();
   }
+
+  // Check if there's been a fall-through out of the method code.
+  BasicBlockId out_bb_id = dex_pc_to_block_map[current_code_item_->insns_size_in_code_units_];
+  if (UNLIKELY(out_bb_id != NullBasicBlockId)) {
+    // Eagerly calculate DFS order to determine if the block is dead.
+    DCHECK(!DfsOrdersUpToDate());
+    ComputeDFSOrders();
+    BasicBlock* out_bb = GetBasicBlock(out_bb_id);
+    DCHECK(out_bb != nullptr);
+    if (out_bb->block_type != kDead) {
+      LOG(WARNING) << "Live fall-through out of method in " << PrettyMethod(method_idx, dex_file);
+      SetPuntToInterpreter(true);
+    }
+  }
 }
 
 void MIRGraph::ShowOpcodeStats() {
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 934fa35..8467b71 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -428,7 +428,7 @@
   { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
   { kX86PextrdRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
   { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrbMRI", "[!0r+!1d],!2r,!3d" },
-  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrwMRI", "[!0r+!1d],!2r,!3d" },
+  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x15, 0, 0, 1, false }, "PextrwMRI", "[!0r+!1d],!2r,!3d" },
   { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrdMRI", "[!0r+!1d],!2r,!3d" },
 
   { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index 36339f7..f58f206 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -180,6 +180,13 @@
        RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
 }
 
+TEST_F(QuickAssembleX86LowLevelTest, Pextrw) {
+  Test(kX86, "Pextrw", "pextrw $7, %xmm3, 8(%eax)\n", kX86PextrwMRI,
+       RegStorage::Solo32(r0).GetReg(), 8, RegStorage::Solo128(3).GetReg(), 7);
+  Test(kX86_64, "Pextrw", "pextrw $7, %xmm8, 8(%r10)\n", kX86PextrwMRI,
+       RegStorage::Solo64(r10q).GetReg(), 8, RegStorage::Solo128(8).GetReg(), 7);
+}
+
 class QuickAssembleX86MacroTest : public QuickAssembleX86TestBase {
  protected:
   typedef void (X86Mir2Lir::*AsmFn)(MIR*);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 6342f91..bdbd571 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -105,6 +105,25 @@
   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
 };
 
+class InvokeDexCallingConventionVisitor {
+ public:
+  virtual Location GetNextLocation(Primitive::Type type) = 0;
+
+ protected:
+  InvokeDexCallingConventionVisitor() {}
+  virtual ~InvokeDexCallingConventionVisitor() {}
+
+  // The current index for core registers.
+  uint32_t gp_index_ = 0u;
+  // The current index for floating-point registers.
+  uint32_t float_index_ = 0u;
+  // The current stack index.
+  uint32_t stack_index_ = 0u;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
 class CodeGenerator {
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
@@ -308,6 +327,7 @@
     return GetFpuSpillSize() + GetCoreSpillSize();
   }
 
+  virtual ParallelMoveResolver* GetMoveResolver() = 0;
 
  protected:
   CodeGenerator(HGraph* graph,
@@ -351,7 +371,6 @@
 
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
-  virtual ParallelMoveResolver* GetMoveResolver() = 0;
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 159bd30..e4c37de 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -605,7 +605,7 @@
   UNREACHABLE();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -680,7 +680,7 @@
   return Location();
 }
 
-Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -1288,7 +1288,7 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(R0));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 6009036..1a498e1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -78,22 +78,19 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor()
-      : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorARM() {}
+  virtual ~InvokeDexCallingConventionVisitorARM() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
   Location GetReturnLocation(Primitive::Type type);
 
  private:
   InvokeDexCallingConvention calling_convention;
-  uint32_t gp_index_;
-  uint32_t float_index_;
-  uint32_t double_index_;
-  uint32_t stack_index_;
+  uint32_t double_index_ = 0;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM);
 };
 
 class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
@@ -151,7 +148,7 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
   CodeGeneratorARM* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorARM parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 946ffc8..9e02a1d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -372,15 +372,15 @@
 
 #undef __
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
   Location next_location;
   if (type == Primitive::kPrimVoid) {
     LOG(FATAL) << "Unreachable type " << type;
   }
 
   if (Primitive::IsFloatingPointType(type) &&
-      (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) {
-    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++));
+      (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
+    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
   } else if (!Primitive::IsFloatingPointType(type) &&
              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
@@ -1907,7 +1907,7 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(LocationFrom(x0));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5a35867..8aeea54 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -119,25 +119,20 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorARM64() {}
+  virtual ~InvokeDexCallingConventionVisitorARM64() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
   Location GetReturnLocation(Primitive::Type return_type) {
     return calling_convention.GetReturnLocation(return_type);
   }
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for core registers.
-  uint32_t gp_index_;
-  // The current index for floating-point registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
 };
 
 class InstructionCodeGeneratorARM64 : public HGraphVisitor {
@@ -193,7 +188,7 @@
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7df4b53..5ee091f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -551,7 +551,7 @@
   UNREACHABLE();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -582,7 +582,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -592,7 +592,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_ += 2;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1238,7 +1238,7 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(EAX));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 8bd3cd3..79dec7a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -75,22 +75,17 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorX86() {}
+  virtual ~InvokeDexCallingConventionVisitorX86() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
-  uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
 };
 
 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
@@ -137,7 +132,7 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
   CodeGeneratorX86* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorX86 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 37b00c8..5ac6866 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1240,7 +1240,7 @@
   codegen_->GenerateFrameExit();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -1270,7 +1270,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1280,7 +1280,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_ += 2;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1338,7 +1338,7 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(RDI));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index e3fd5d7..13f9c46 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -68,22 +68,17 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorX86_64() {}
+  virtual ~InvokeDexCallingConventionVisitorX86_64() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
-  uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
 };
 
 class CodeGeneratorX86_64;
@@ -147,7 +142,7 @@
   void HandleFieldGet(HInstruction* instruction);
 
   CodeGeneratorX86_64* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 91cd60a..cd427c5 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -65,10 +65,13 @@
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
     if (live_blocks.IsBitSet(block->GetBlockId())) {
-      continue;
+      // If this block is part of a loop that is being dismantled, we need to
+      // update its loop information.
+      block->UpdateLoopInformation();
+    } else {
+      MaybeRecordDeadBlock(block);
+      block->DisconnectAndDelete();
     }
-    MaybeRecordDeadBlock(block);
-    block->DisconnectAndDelete();
   }
 
   // Connect successive blocks created by dead branches. Order does not matter.
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 5d3db5c..43fe374 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -374,4 +374,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index dbb7cba..c243ef3 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
 #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
 
+#include "code_generator.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "parallel_move_resolver.h"
 
 namespace art {
 
@@ -76,6 +78,38 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
+  static void MoveArguments(HInvoke* invoke,
+                            CodeGenerator* codegen,
+                            InvokeDexCallingConventionVisitor* calling_convention_visitor) {
+    if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
+      HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+      // When we do not run baseline, explicit clinit checks triggered by static
+      // invokes must have been pruned by art::PrepareForRegisterAllocation.
+      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+    }
+
+    if (invoke->GetNumberOfArguments() == 0) {
+      // No argument to move.
+      return;
+    }
+
+    LocationSummary* locations = invoke->GetLocations();
+
+    // We're moving potentially two or more locations to locations that could overlap, so we need
+    // a parallel move resolver.
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+
+    for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
+      HInstruction* input = invoke->InputAt(i);
+      Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType());
+      Location actual_loc = locations->InAt(i);
+
+      parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
+    }
+
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+
  protected:
   IntrinsicVisitor() {}
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e3fa272..7f7b450 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -77,28 +77,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
-  if (invoke->GetNumberOfArguments() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) {
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -117,7 +98,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d71b49e..ca3de99 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -86,28 +86,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) {
-  if (invoke->GetNumberOfArguments() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
+  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -126,7 +107,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 18fb3c4..1eef1ef 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -111,28 +111,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
-  if (invoke->GetNumberOfArguments() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
+  InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -155,7 +136,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
@@ -749,7 +730,7 @@
 }
 
 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
-  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+  MoveArguments(invoke, codegen);
 
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index db7b58b..1fc5432 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -103,28 +103,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
-  if (invoke->GetNumberOfArguments() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -143,7 +124,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -623,7 +604,7 @@
 }
 
 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
-  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+  MoveArguments(invoke, codegen);
 
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e2eb46a..f07f4c7 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -376,7 +376,6 @@
 }
 
 HBasicBlock* HLoopInformation::GetPreHeader() const {
-  DCHECK_EQ(header_->GetPredecessors().Size(), 2u);
   return header_->GetDominator();
 }
 
@@ -449,6 +448,20 @@
   instructions_.InsertInstructionBefore(instruction, cursor);
 }
 
+void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(!cursor->IsPhi());
+  DCHECK(!instruction->IsPhi());
+  DCHECK_EQ(instruction->GetId(), -1);
+  DCHECK_NE(cursor->GetId(), -1);
+  DCHECK_EQ(cursor->GetBlock(), this);
+  DCHECK(!instruction->IsControlFlow());
+  DCHECK(!cursor->IsControlFlow());
+  instruction->SetBlock(this);
+  instruction->SetId(GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(instruction);
+  instructions_.InsertInstructionAfter(instruction, cursor);
+}
+
 void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
   DCHECK_EQ(phi->GetId(), -1);
   DCHECK_NE(cursor->GetId(), -1);
@@ -1024,6 +1037,20 @@
   SetGraph(nullptr);
 }
 
+void HBasicBlock::UpdateLoopInformation() {
+  // Check if loop information points to a dismantled loop. If so, replace with
+  // the loop information of a larger loop which contains this block, or nullptr
+  // otherwise. We iterate in case the larger loop has been destroyed too.
+  while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) {
+    if (IsLoopHeader()) {
+      HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck();
+      DCHECK_EQ(suspend_check->GetBlock(), this);
+      RemoveInstruction(suspend_check);
+    }
+    loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation();
+  }
+}
+
 void HBasicBlock::MergeWith(HBasicBlock* other) {
   DCHECK_EQ(GetGraph(), other->GetGraph());
   DCHECK(GetDominatedBlocks().Contains(other));
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f64086e..50eecff 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -397,6 +397,11 @@
     return back_edges_;
   }
 
+  HBasicBlock* GetSingleBackEdge() const {
+    DCHECK_EQ(back_edges_.Size(), 1u);
+    return back_edges_.Get(0);
+  }
+
   void ClearBackEdges() {
     back_edges_.Reset();
   }
@@ -620,7 +625,9 @@
   void DisconnectAndDelete();
 
   void AddInstruction(HInstruction* instruction);
+  // Insert `instruction` before/after an existing instruction `cursor`.
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+  void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
@@ -634,7 +641,7 @@
   void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true);
 
   bool IsLoopHeader() const {
-    return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
+    return IsInLoop() && (loop_information_->GetHeader() == this);
   }
 
   bool IsLoopPreHeaderFirstPredecessor() const {
@@ -653,7 +660,7 @@
   void SetInLoop(HLoopInformation* info) {
     if (IsLoopHeader()) {
       // Nothing to do. This just means `info` is an outer loop.
-    } else if (loop_information_ == nullptr) {
+    } else if (!IsInLoop()) {
       loop_information_ = info;
     } else if (loop_information_->Contains(*info->GetHeader())) {
       // Block is currently part of an outer loop. Make it part of this inner loop.
@@ -672,6 +679,11 @@
     loop_information_ = info;
   }
 
+  // Checks if the loop information points to a valid loop. If the loop has been
+  // dismantled (does not have a back edge any more), loop information is
+  // removed or replaced with the information of the first valid outer loop.
+  void UpdateLoopInformation();
+
   bool IsInLoop() const { return loop_information_ != nullptr; }
 
   // Returns wheter this block dominates the blocked passed as parameter.
@@ -725,7 +737,7 @@
 
   void Advance() {
     DCHECK(!Done());
-    current_ = current_->GetHeader()->GetDominator()->GetLoopInformation();
+    current_ = current_->GetPreHeader()->GetLoopInformation();
   }
 
   HLoopInformation* Current() const {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a8d006f..812642b 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1467,23 +1467,28 @@
 
     LiveRange* range = current->GetFirstRange();
     while (range != nullptr) {
-      DCHECK(use == nullptr || use->GetPosition() >= range->GetStart());
+      while (use != nullptr && use->GetPosition() < range->GetStart()) {
+        DCHECK(use->IsSynthesized());
+        use = use->GetNext();
+      }
       while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
         DCHECK(!use->GetIsEnvironment());
         DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        LocationSummary* locations = use->GetUser()->GetLocations();
-        Location expected_location = locations->InAt(use->GetInputIndex());
-        // The expected (actual) location may be invalid in case the input is unused. Currently
-        // this only happens for intrinsics.
-        if (expected_location.IsValid()) {
-          if (expected_location.IsUnallocated()) {
-            locations->SetInAt(use->GetInputIndex(), source);
-          } else if (!expected_location.IsConstant()) {
-            AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+        if (!use->IsSynthesized()) {
+          LocationSummary* locations = use->GetUser()->GetLocations();
+          Location expected_location = locations->InAt(use->GetInputIndex());
+          // The expected (actual) location may be invalid in case the input is unused. Currently
+          // this only happens for intrinsics.
+          if (expected_location.IsValid()) {
+            if (expected_location.IsUnallocated()) {
+              locations->SetInAt(use->GetInputIndex(), source);
+            } else if (!expected_location.IsConstant()) {
+              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+            }
+          } else {
+            DCHECK(use->GetUser()->IsInvoke());
+            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
           }
-        } else {
-          DCHECK(use->GetUser()->IsInvoke());
-          DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
         }
         use = use->GetNext();
       }
@@ -1561,7 +1566,13 @@
     current = next_sibling;
   } while (current != nullptr);
 
-  DCHECK(use == nullptr);
+  if (kIsDebugBuild) {
+    // Following uses can only be synthesized uses.
+    while (use != nullptr) {
+      DCHECK(use->IsSynthesized());
+      use = use->GetNext();
+    }
+  }
 }
 
 void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index b674f74..0bbcb30 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -341,7 +341,7 @@
   size_t end = GetEnd();
   while (use != nullptr && use->GetPosition() <= end) {
     size_t use_position = use->GetPosition();
-    if (use_position >= start) {
+    if (use_position >= start && !use->IsSynthesized()) {
       HInstruction* user = use->GetUser();
       size_t input_index = use->GetInputIndex();
       if (user->IsPhi()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b95276a..b74e655 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -112,12 +112,15 @@
         is_environment_(is_environment),
         position_(position),
         next_(next) {
-    DCHECK(user->IsPhi()
+    DCHECK((user == nullptr)
+        || user->IsPhi()
         || (GetPosition() == user->GetLifetimePosition() + 1)
         || (GetPosition() == user->GetLifetimePosition()));
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
+  static constexpr size_t kNoInput = -1;
+
   size_t GetPosition() const { return position_; }
 
   UsePosition* GetNext() const { return next_; }
@@ -126,14 +129,16 @@
   HInstruction* GetUser() const { return user_; }
 
   bool GetIsEnvironment() const { return is_environment_; }
+  bool IsSynthesized() const { return user_ == nullptr; }
 
   size_t GetInputIndex() const { return input_index_; }
 
   void Dump(std::ostream& stream) const {
     stream << position_;
-    if (is_environment_) {
-      stream << " (env)";
-    }
+  }
+
+  HLoopInformation* GetLoopInformation() const {
+    return user_->GetBlock()->GetLoopInformation();
   }
 
   UsePosition* Dup(ArenaAllocator* allocator) const {
@@ -142,6 +147,15 @@
         next_ == nullptr ? nullptr : next_->Dup(allocator));
   }
 
+  bool RequiresRegister() const {
+    if (GetIsEnvironment()) return false;
+    if (IsSynthesized()) return false;
+    Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
+    return location.IsUnallocated()
+        && (location.GetPolicy() == Location::kRequiresRegister
+            || location.GetPolicy() == Location::kRequiresFpuRegister);
+  }
+
  private:
   HInstruction* const user_;
   const size_t input_index_;
@@ -240,9 +254,15 @@
         // location of the input just before that instruction (and not potential moves due
         // to splitting).
         position = instruction->GetLifetimePosition();
+      } else if (!locations->InAt(input_index).IsValid()) {
+        return;
       }
     }
 
+    if (!is_environment && instruction->IsInLoop()) {
+      AddBackEdgeUses(*instruction->GetBlock());
+    }
+
     DCHECK(position == instruction->GetLifetimePosition()
            || position == instruction->GetLifetimePosition() + 1);
 
@@ -306,6 +326,9 @@
 
   void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
     DCHECK(instruction->IsPhi());
+    if (block->IsInLoop()) {
+      AddBackEdgeUses(*block);
+    }
     first_use_ = new (allocator_) UsePosition(
         instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
   }
@@ -456,27 +479,9 @@
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
     }
-    if (position == GetStart() && IsParent()) {
-      LocationSummary* locations = defined_by_->GetLocations();
-      Location location = locations->Out();
-      // This interval is the first interval of the instruction. If the output
-      // of the instruction requires a register, we return the position of that instruction
-      // as the first register use.
-      if (location.IsUnallocated()) {
-        if ((location.GetPolicy() == Location::kRequiresRegister)
-             || (location.GetPolicy() == Location::kSameAsFirstInput
-                 && (locations->InAt(0).IsRegister()
-                     || locations->InAt(0).IsRegisterPair()
-                     || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
-          return position;
-        } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
-                   || (location.GetPolicy() == Location::kSameAsFirstInput
-                       && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
-          return position;
-        }
-      } else if (location.IsRegister() || location.IsRegisterPair()) {
-        return position;
-      }
+
+    if (IsDefiningPosition(position) && DefinitionRequiresRegister()) {
+      return position;
     }
 
     UsePosition* use = first_use_;
@@ -484,10 +489,7 @@
     while (use != nullptr && use->GetPosition() <= end) {
       size_t use_position = use->GetPosition();
       if (use_position > position) {
-        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        if (location.IsUnallocated()
-            && (location.GetPolicy() == Location::kRequiresRegister
-                || location.GetPolicy() == Location::kRequiresFpuRegister)) {
+        if (use->RequiresRegister()) {
           return use_position;
         }
       }
@@ -505,18 +507,16 @@
       return position == GetStart() ? position : kNoLifetime;
     }
 
-    if (position == GetStart() && IsParent()) {
-      if (defined_by_->GetLocations()->Out().IsValid()) {
-        return position;
-      }
+    if (IsDefiningPosition(position)) {
+      DCHECK(defined_by_->GetLocations()->Out().IsValid());
+      return position;
     }
 
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
-      Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
       size_t use_position = use->GetPosition();
-      if (use_position > position && location.IsValid()) {
+      if (use_position > position) {
         return use_position;
       }
       use = use->GetNext();
@@ -664,7 +664,7 @@
         stream << " ";
       } while ((use = use->GetNext()) != nullptr);
     }
-    stream << "}, {";
+    stream << "}, { ";
     use = first_env_use_;
     if (use != nullptr) {
       do {
@@ -910,6 +910,100 @@
     return range;
   }
 
+  bool DefinitionRequiresRegister() const {
+    DCHECK(IsParent());
+    LocationSummary* locations = defined_by_->GetLocations();
+    Location location = locations->Out();
+    // This interval is the first interval of the instruction. If the output
+    // of the instruction requires a register, we return the position of that instruction
+    // as the first register use.
+    if (location.IsUnallocated()) {
+      if ((location.GetPolicy() == Location::kRequiresRegister)
+           || (location.GetPolicy() == Location::kSameAsFirstInput
+               && (locations->InAt(0).IsRegister()
+                   || locations->InAt(0).IsRegisterPair()
+                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+        return true;
+      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                 || (location.GetPolicy() == Location::kSameAsFirstInput
+                     && (locations->InAt(0).IsFpuRegister()
+                         || locations->InAt(0).IsFpuRegisterPair()
+                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+        return true;
+      }
+    } else if (location.IsRegister() || location.IsRegisterPair()) {
+      return true;
+    }
+    return false;
+  }
+
+  bool IsDefiningPosition(size_t position) const {
+    return IsParent() && (position == GetStart());
+  }
+
+  bool HasSynthesizeUseAt(size_t position) const {
+    UsePosition* use = first_use_;
+    while (use != nullptr) {
+      size_t use_position = use->GetPosition();
+      if ((use_position == position) && use->IsSynthesized()) {
+        return true;
+      }
+      if (use_position > position) break;
+      use = use->GetNext();
+    }
+    return false;
+  }
+
+  void AddBackEdgeUses(const HBasicBlock& block_at_use) {
+    DCHECK(block_at_use.IsInLoop());
+    // Add synthesized uses at the back edge of loops to help the register allocator.
+    // Note that this method is called in decreasing liveness order, to faciliate adding
+    // uses at the head of the `first_use_` linked list. Because below
+    // we iterate from inner-most to outer-most, which is in increasing liveness order,
+    // we need to take extra care of how the `first_use_` linked list is being updated.
+    UsePosition* first_in_new_list = nullptr;
+    UsePosition* last_in_new_list = nullptr;
+    for (HLoopInformationOutwardIterator it(block_at_use);
+         !it.Done();
+         it.Advance()) {
+      HLoopInformation* current = it.Current();
+      if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) {
+        // This interval is defined in the loop. We can stop going outward.
+        break;
+      }
+
+      size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd();
+      if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
+        // There was a use already seen in this loop. Therefore the previous call to `AddUse`
+        // already inserted the backedge use. We can stop going outward.
+        DCHECK(HasSynthesizeUseAt(back_edge_use_position));
+        break;
+      }
+
+      DCHECK(last_in_new_list == nullptr
+             || back_edge_use_position > last_in_new_list->GetPosition());
+
+      UsePosition* new_use = new (allocator_) UsePosition(
+          nullptr, UsePosition::kNoInput, /* is_environment */ false,
+          back_edge_use_position, nullptr);
+
+      if (last_in_new_list != nullptr) {
+        // Going outward. The latest created use needs to point to the new use.
+        last_in_new_list->SetNext(new_use);
+      } else {
+        // This is the inner-most loop.
+        DCHECK_EQ(current, block_at_use.GetLoopInformation());
+        first_in_new_list = new_use;
+      }
+      last_in_new_list = new_use;
+    }
+    // Link the newly created linked list with `first_use_`.
+    if (last_in_new_list != nullptr) {
+      last_in_new_list->SetNext(first_use_);
+      first_use_ = first_in_new_list;
+    }
+  }
+
   ArenaAllocator* const allocator_;
 
   // Ranges of this interval. We need a quick access to the last range to test
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index b764095..8490afb 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
@@ -324,26 +325,19 @@
     return nullptr;
   }
 
-  static void Message(char severity, const std::string& message) {
-    // TODO: Remove when we switch to LOG when we can guarantee it won't prevent shutdown in error
-    //       cases.
-    fprintf(stderr, "dex2oat%s %c %d %d %s\n",
-            kIsDebugBuild ? "d" : "",
-            severity,
-            getpid(),
-            GetTid(),
-            message.c_str());
-  }
-
   NO_RETURN static void Fatal(const std::string& message) {
-    Message('F', message);
+    // TODO: When we can guarantee it won't prevent shutdown in error cases, move to LOG. However,
+    //       it's rather easy to hang in unwinding.
+    //       LogLine also avoids ART logging lock issues, as it's really only a wrapper around
+    //       logcat logging or stderr output.
+    LogMessage::LogLine(__FILE__, __LINE__, LogSeverity::FATAL, message.c_str());
     exit(1);
   }
 
   void Wait() {
     // TODO: tune the multiplier for GC verification, the following is just to make the timeout
     //       large.
-    int64_t multiplier = kVerifyObjectSupport > kVerifyObjectModeFast ? 100 : 1;
+    constexpr int64_t multiplier = kVerifyObjectSupport > kVerifyObjectModeFast ? 100 : 1;
     timespec timeout_ts;
     InitTimeSpec(true, CLOCK_REALTIME, multiplier * kWatchDogTimeoutSeconds * 1000, 0, &timeout_ts);
     const char* reason = "dex2oat watch dog thread waiting";
@@ -351,7 +345,8 @@
     while (!shutting_down_) {
       int rc = TEMP_FAILURE_RETRY(pthread_cond_timedwait(&cond_, &mutex_, &timeout_ts));
       if (rc == ETIMEDOUT) {
-        Fatal(StringPrintf("dex2oat did not finish after %d seconds", kWatchDogTimeoutSeconds));
+        Fatal(StringPrintf("dex2oat did not finish after %" PRId64 " seconds",
+                           kWatchDogTimeoutSeconds));
       } else if (rc != 0) {
         std::string message(StringPrintf("pthread_cond_timedwait failed: %s",
                                          strerror(errno)));
@@ -363,10 +358,10 @@
 
   // When setting timeouts, keep in mind that the build server may not be as fast as your desktop.
   // Debug builds are slower so they have larger timeouts.
-  static const unsigned int kSlowdownFactor = kIsDebugBuild ? 5U : 1U;
+  static constexpr int64_t kSlowdownFactor = kIsDebugBuild ? 5U : 1U;
 
-  // 6 minutes scaled by kSlowdownFactor.
-  static const unsigned int kWatchDogTimeoutSeconds = kSlowdownFactor * 6 * 60;
+  // 10 minutes scaled by kSlowdownFactor.
+  static constexpr int64_t kWatchDogTimeoutSeconds = kSlowdownFactor * 10 * 60;
 
   bool is_watch_dog_enabled_;
   bool shutting_down_;
@@ -1806,8 +1801,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
 
-const unsigned int WatchDog::kWatchDogTimeoutSeconds;
-
 static void b13564922() {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index ba0c0bd..2ead4a2 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -587,6 +587,14 @@
               src_reg_file = SSE;
               immediate_bytes = 1;
               break;
+          case 0x15:
+              opcode1 = "pextrw";
+              prefix[2] = 0;
+              has_modrm = true;
+              store = true;
+              src_reg_file = SSE;
+              immediate_bytes = 1;
+              break;
             case 0x16:
               opcode1 = "pextrd";
               prefix[2] = 0;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 240799e..ece9d4b 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -468,7 +468,7 @@
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libdl
     # ZipArchive support, the order matters here to get all symbols.
-    LOCAL_STATIC_LIBRARIES := libziparchive libz
+    LOCAL_STATIC_LIBRARIES := libziparchive libz libbase
     # For android::FileMap used by libziparchive.
     LOCAL_SHARED_LIBRARIES += libutils
     # For liblog, atrace, properties, ashmem, set_sched_policy and socket_peer_is_trusted.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 962e821..b099088 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -82,10 +82,6 @@
 
 static constexpr bool kSanityCheckObjects = kIsDebugBuild;
 
-// Do a simple class redefinition check in OpenDexFilesFromOat. This is a conservative check to
-// avoid problems with compile-time class-path != runtime class-path.
-static constexpr bool kCheckForDexCollisions = true;
-
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -743,6 +739,8 @@
     const char* rhsDescriptor = rhs.cached_descriptor_;
     int cmp = strcmp(lhsDescriptor, rhsDescriptor);
     if (cmp != 0) {
+      // Note that the order must be reversed. We want to iterate over the classes in dex files.
+      // They are sorted lexicographically. Thus, the priority-queue must be a min-queue.
       return cmp > 0;
     }
     return dex_file_ < rhs.dex_file_;
@@ -768,6 +766,11 @@
     return dex_file_;
   }
 
+  void DeleteDexFile() {
+    delete dex_file_;
+    dex_file_ = nullptr;
+  }
+
  private:
   static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
     const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
@@ -799,13 +802,13 @@
   }
 }
 
-static void AddNext(const DexFileAndClassPair& original,
+static void AddNext(DexFileAndClassPair* original,
                     std::priority_queue<DexFileAndClassPair>* heap) {
-  if (original.DexFileHasMoreClasses()) {
-    heap->push(original.GetNext());
+  if (original->DexFileHasMoreClasses()) {
+    heap->push(original->GetNext());
   } else {
     // Need to delete the dex file.
-    delete original.GetDexFile();
+    original->DeleteDexFile();
   }
 }
 
@@ -824,19 +827,17 @@
 // the two elements agree on whether their dex file was from an already-loaded oat-file or the
 // new oat file. Any disagreement indicates a collision.
 bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) {
-  if (!kCheckForDexCollisions) {
-    return false;
-  }
-
   // Dex files are registered late - once a class is actually being loaded. We have to compare
-  // against the open oat files.
+  // against the open oat files. Take the dex_lock_ that protects oat_files_ accesses.
   ReaderMutexLock mu(Thread::Current(), dex_lock_);
 
-  std::priority_queue<DexFileAndClassPair> heap;
+  std::priority_queue<DexFileAndClassPair> queue;
 
   // Add dex files from already loaded oat files, but skip boot.
   {
-    // To grab the boot oat, look at the dex files in the boot classpath.
+    // To grab the boot oat, look at the dex files in the boot classpath. Any of those is fine, as
+    // they were all compiled into the same oat file. So grab the first one, which is guaranteed to
+    // exist if the boot class-path isn't empty.
     const OatFile* boot_oat = nullptr;
     if (!boot_class_path_.empty()) {
       const DexFile* boot_dex_file = boot_class_path_[0];
@@ -850,26 +851,26 @@
       if (loaded_oat_file == boot_oat) {
         continue;
       }
-      AddDexFilesFromOat(loaded_oat_file, true, &heap);
+      AddDexFilesFromOat(loaded_oat_file, true, &queue);
     }
   }
 
-  if (heap.empty()) {
+  if (queue.empty()) {
     // No other oat files, return early.
     return false;
   }
 
   // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, false, &heap);
+  AddDexFilesFromOat(oat_file, false, &queue);
 
-  // Now drain the heap.
-  while (!heap.empty()) {
-    DexFileAndClassPair compare_pop = heap.top();
-    heap.pop();
+  // Now drain the queue.
+  while (!queue.empty()) {
+    DexFileAndClassPair compare_pop = queue.top();
+    queue.pop();
 
     // Compare against the following elements.
-    while (!heap.empty()) {
-      DexFileAndClassPair top = heap.top();
+    while (!queue.empty()) {
+      DexFileAndClassPair top = queue.top();
 
       if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
         // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
@@ -879,18 +880,18 @@
                            compare_pop.GetCachedDescriptor(),
                            compare_pop.GetDexFile()->GetLocation().c_str(),
                            top.GetDexFile()->GetLocation().c_str());
-          FreeDexFilesInHeap(&heap);
+          FreeDexFilesInHeap(&queue);
           return true;
         }
         // Pop it.
-        heap.pop();
-        AddNext(top, &heap);
+        queue.pop();
+        AddNext(&top, &queue);
       } else {
         // Something else. Done here.
         break;
       }
     }
-    AddNext(compare_pop, &heap);
+    AddNext(&compare_pop, &queue);
   }
 
   return false;
@@ -941,11 +942,10 @@
     // Get the oat file on disk.
     std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
     if (oat_file.get() != nullptr) {
-      // Take the file only if it has no collisions.
-      if (!HasCollisions(oat_file.get(), &error_msg)) {
-        source_oat_file = oat_file.release();
-        RegisterOatFile(source_oat_file);
-      } else {
+      // Take the file only if it has no collisions, or we must take it because of preopting.
+      bool accept_oat_file = !HasCollisions(oat_file.get(), &error_msg);
+      if (!accept_oat_file) {
+        // Failed the collision check. Print warning.
         if (Runtime::Current()->IsDexFileFallbackEnabled()) {
           LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
                        << dex_location;
@@ -954,6 +954,19 @@
                           " load classes for " << dex_location;
         }
         LOG(WARNING) << error_msg;
+
+        // However, if the app was part of /system and preopted, there is no original dex file
+        // available. In that case grudgingly accept the oat file.
+        if (!DexFile::MaybeDex(dex_location)) {
+          accept_oat_file = true;
+          LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
+                       << "Allow oat file use. This is potentially dangerous.";
+        }
+      }
+
+      if (accept_oat_file) {
+        source_oat_file = oat_file.release();
+        RegisterOatFile(source_oat_file);
       }
     }
   }
@@ -975,8 +988,7 @@
     if (Runtime::Current()->IsDexFileFallbackEnabled()) {
       if (!DexFile::Open(dex_location, dex_location, &error_msg, &dex_files)) {
         LOG(WARNING) << error_msg;
-        error_msgs->push_back("Failed to open dex files from "
-            + std::string(dex_location));
+        error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
       }
     } else {
       error_msgs->push_back("Fallback mode disabled, skipping dex files.");
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 0589cdd..dfe5a04 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -153,6 +153,31 @@
   return false;
 }
 
+static bool ContainsClassesDex(int fd, const char* filename) {
+  std::string error_msg;
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, filename, &error_msg));
+  if (zip_archive.get() == nullptr) {
+    return false;
+  }
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(DexFile::kClassesDex, &error_msg));
+  return (zip_entry.get() != nullptr);
+}
+
+bool DexFile::MaybeDex(const char* filename) {
+  uint32_t magic;
+  std::string error_msg;
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+  if (fd.get() == -1) {
+    return false;
+  }
+  if (IsZipMagic(magic)) {
+    return ContainsClassesDex(fd.release(), filename);
+  } else if (IsDexMagic(magic)) {
+    return true;
+  }
+  return false;
+}
+
 int DexFile::GetPermissions() const {
   if (mem_map_.get() == nullptr) {
     return 0;
@@ -296,6 +321,12 @@
   return dex_file;
 }
 
+// Technically we do not have a limitation with respect to the number of dex files that can be in a
+// multidex APK. However, it's bad practice, as each dex file requires its own tables for symbols
+// (types, classes, methods, ...) and dex caches. So warn the user that we open a zip with what
+// seems an excessive number.
+static constexpr size_t kWarnOnManyDexFilesThreshold = 100;
+
 bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
@@ -310,14 +341,13 @@
     dex_files->push_back(std::move(dex_file));
 
     // Now try some more.
-    size_t i = 2;
 
     // We could try to avoid std::string allocations by working on a char array directly. As we
     // do not expect a lot of iterations, this seems too involved and brittle.
 
-    while (i < 100) {
-      std::string name = StringPrintf("classes%zu.dex", i);
-      std::string fake_location = location + kMultiDexSeparator + name;
+    for (size_t i = 1; ; ++i) {
+      std::string name = GetMultiDexClassesDexName(i);
+      std::string fake_location = GetMultiDexLocation(i, location.c_str());
       std::unique_ptr<const DexFile> next_dex_file(Open(zip_archive, name.c_str(), fake_location,
                                                         error_msg, &error_code));
       if (next_dex_file.get() == nullptr) {
@@ -329,7 +359,16 @@
         dex_files->push_back(std::move(next_dex_file));
       }
 
-      i++;
+      if (i == kWarnOnManyDexFilesThreshold) {
+        LOG(WARNING) << location << " has in excess of " << kWarnOnManyDexFilesThreshold
+                     << " dex files. Please consider coalescing and shrinking the number to "
+                        " avoid runtime overhead.";
+      }
+
+      if (i == std::numeric_limits<size_t>::max()) {
+        LOG(ERROR) << "Overflow in number of dex files!";
+        break;
+      }
     }
 
     return true;
@@ -973,11 +1012,19 @@
   return strrchr(location, kMultiDexSeparator) != nullptr;
 }
 
-std::string DexFile::GetMultiDexClassesDexName(size_t number, const char* dex_location) {
-  if (number == 0) {
+std::string DexFile::GetMultiDexClassesDexName(size_t index) {
+  if (index == 0) {
+    return "classes.dex";
+  } else {
+    return StringPrintf("classes%zu.dex", index + 1);
+  }
+}
+
+std::string DexFile::GetMultiDexLocation(size_t index, const char* dex_location) {
+  if (index == 0) {
     return dex_location;
   } else {
-    return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, number + 1);
+    return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, index + 1);
   }
 }
 
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 0d07358..84eaa4a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -388,6 +388,10 @@
   static bool Open(const char* filename, const char* location, std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
+  // Checks whether the given file has the dex magic, or is a zip file with a classes.dex entry.
+  // If this function returns false, Open will not succeed. The inverse is not true, however.
+  static bool MaybeDex(const char* filename);
+
   // Opens .dex file, backed by existing memory
   static std::unique_ptr<const DexFile> Open(const uint8_t* base, size_t size,
                                              const std::string& location,
@@ -888,7 +892,13 @@
     return size_;
   }
 
-  static std::string GetMultiDexClassesDexName(size_t number, const char* dex_location);
+  // Return the name of the index-th classes.dex in a multidex zip file. This is classes.dex for
+  // index == 0, and classes{index + 1}.dex else.
+  static std::string GetMultiDexClassesDexName(size_t index);
+
+  // Return the (possibly synthetic) dex location for a multidex entry. This is dex_location for
+  // index == 0, and dex_location + multi-dex-separator + GetMultiDexClassesDexName(index) else.
+  static std::string GetMultiDexLocation(size_t index, const char* dex_location);
 
   // Returns the canonical form of the given dex location.
   //
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 4d099e1..90b35a3 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -350,11 +350,20 @@
 }
 
 TEST_F(DexFileTest, GetMultiDexClassesDexName) {
+  ASSERT_EQ("classes.dex", DexFile::GetMultiDexClassesDexName(0));
+  ASSERT_EQ("classes2.dex", DexFile::GetMultiDexClassesDexName(1));
+  ASSERT_EQ("classes3.dex", DexFile::GetMultiDexClassesDexName(2));
+  ASSERT_EQ("classes100.dex", DexFile::GetMultiDexClassesDexName(99));
+}
+
+TEST_F(DexFileTest, GetMultiDexLocation) {
   std::string dex_location_str = "/system/app/framework.jar";
   const char* dex_location = dex_location_str.c_str();
-  ASSERT_EQ("/system/app/framework.jar", DexFile::GetMultiDexClassesDexName(0, dex_location));
-  ASSERT_EQ("/system/app/framework.jar:classes2.dex", DexFile::GetMultiDexClassesDexName(1, dex_location));
-  ASSERT_EQ("/system/app/framework.jar:classes101.dex", DexFile::GetMultiDexClassesDexName(100, dex_location));
+  ASSERT_EQ("/system/app/framework.jar", DexFile::GetMultiDexLocation(0, dex_location));
+  ASSERT_EQ("/system/app/framework.jar:classes2.dex",
+            DexFile::GetMultiDexLocation(1, dex_location));
+  ASSERT_EQ("/system/app/framework.jar:classes101.dex",
+            DexFile::GetMultiDexLocation(100, dex_location));
 }
 
 TEST_F(DexFileTest, GetDexCanonicalLocation) {
@@ -363,7 +372,7 @@
   std::string dex_location(dex_location_real.get());
 
   ASSERT_EQ(dex_location, DexFile::GetDexCanonicalLocation(dex_location.c_str()));
-  std::string multidex_location = DexFile::GetMultiDexClassesDexName(1, dex_location.c_str());
+  std::string multidex_location = DexFile::GetMultiDexLocation(1, dex_location.c_str());
   ASSERT_EQ(multidex_location, DexFile::GetDexCanonicalLocation(multidex_location.c_str()));
 
   std::string dex_location_sym = dex_location + "symlink";
@@ -371,7 +380,7 @@
 
   ASSERT_EQ(dex_location, DexFile::GetDexCanonicalLocation(dex_location_sym.c_str()));
 
-  std::string multidex_location_sym = DexFile::GetMultiDexClassesDexName(1, dex_location_sym.c_str());
+  std::string multidex_location_sym = DexFile::GetMultiDexLocation(1, dex_location_sym.c_str());
   ASSERT_EQ(multidex_location, DexFile::GetDexCanonicalLocation(multidex_location_sym.c_str()));
 
   ASSERT_EQ(0, unlink(dex_location_sym.c_str()));
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 85234dc..49c7fda 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1042,10 +1042,11 @@
 
 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
   const size_t kBitsPerVec = 32;
-  DCHECK_GE(num_slots * kBitsPerVec, num_vec);
+  DCHECK_GE(num_vec * kBitsPerVec, num_slots);
+  DCHECK_NE(num_vec, 0U);
   size_t remain = num_vec * kBitsPerVec - num_slots;
-  DCHECK_NE(remain, kBitsPerVec);
-  return ((1U << remain) - 1) << (kBitsPerVec - remain);
+  DCHECK_LT(remain, kBitsPerVec);
+  return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F);
 }
 
 inline bool RosAlloc::Run::IsAllFree() {
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index e2b9559..0ef58ea 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -175,10 +175,16 @@
   DCHECK(table_ != nullptr);
   DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
-  if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid &&
-      Thread::Current()->HandleScopeContains(reinterpret_cast<jobject>(iref))) {
-    LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
-    return true;
+  if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid) {
+    auto* self = Thread::Current();
+    if (self->HandleScopeContains(reinterpret_cast<jobject>(iref))) {
+      auto* env = self->GetJniEnv();
+      DCHECK(env != nullptr);
+      if (env->check_jni) {
+        LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
+      }
+      return true;
+    }
   }
   const int idx = ExtractIndex(iref);
   if (idx < bottomIndex) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index ef3c6e2..ae67efb 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -501,6 +501,7 @@
   uint16_t num_regs;
   if (LIKELY(code_item != nullptr)) {
     num_regs = code_item->registers_size_;
+    DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_);
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
     num_regs = num_ins;
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 8f5a7d4..cd5d2f6 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -54,8 +54,9 @@
 // Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
 class SetStringCountAndBytesVisitor {
  public:
-  SetStringCountAndBytesVisitor(int32_t count, uint8_t* src, int32_t high_byte)
-      : count_(count), src_(src), high_byte_(high_byte) {
+  SetStringCountAndBytesVisitor(int32_t count, Handle<ByteArray> src_array, int32_t offset,
+                                int32_t high_byte)
+      : count_(count), src_array_(src_array), offset_(offset), high_byte_(high_byte) {
   }
 
   void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
@@ -64,35 +65,63 @@
     String* string = down_cast<String*>(obj);
     string->SetCount(count_);
     uint16_t* value = string->GetValue();
+    const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_;
     for (int i = 0; i < count_; i++) {
-      value[i] = high_byte_ + (src_[i] & 0xFF);
+      value[i] = high_byte_ + (src[i] & 0xFF);
     }
   }
 
  private:
   const int32_t count_;
-  const uint8_t* const src_;
+  Handle<ByteArray> src_array_;
+  const int32_t offset_;
   const int32_t high_byte_;
 };
 
 // Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
-class SetStringCountAndValueVisitor {
+class SetStringCountAndValueVisitorFromCharArray {
  public:
-  SetStringCountAndValueVisitor(int32_t count, uint16_t* src) : count_(count), src_(src) {
+  SetStringCountAndValueVisitorFromCharArray(int32_t count, Handle<CharArray> src_array,
+                                             int32_t offset) :
+    count_(count), src_array_(src_array), offset_(offset) {
   }
 
-  void operator()(Object* obj, size_t usable_size) const
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(usable_size);
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
     String* string = down_cast<String*>(obj);
     string->SetCount(count_);
-    memcpy(string->GetValue(), src_, count_ * sizeof(uint16_t));
+    const uint16_t* const src = src_array_->GetData() + offset_;
+    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
   }
 
  private:
   const int32_t count_;
-  const uint16_t* const src_;
+  Handle<CharArray> src_array_;
+  const int32_t offset_;
+};
+
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndValueVisitorFromString {
+ public:
+  SetStringCountAndValueVisitorFromString(int32_t count, Handle<String> src_string,
+                                          int32_t offset) :
+    count_(count), src_string_(src_string), offset_(offset) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    const uint16_t* const src = src_string_->GetValue() + offset_;
+    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+  }
+
+ private:
+  const int32_t count_;
+  Handle<String> src_string_;
+  const int32_t offset_;
 };
 
 inline String* String::Intern() {
@@ -140,8 +169,7 @@
 inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length,
                                           Handle<ByteArray> array, int32_t offset,
                                           int32_t high_byte, gc::AllocatorType allocator_type) {
-  uint8_t* data = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
-  SetStringCountAndBytesVisitor visitor(byte_length, data, high_byte << 8);
+  SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8);
   String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor);
   return string;
 }
@@ -150,8 +178,7 @@
 inline String* String::AllocFromCharArray(Thread* self, int32_t array_length,
                                           Handle<CharArray> array, int32_t offset,
                                           gc::AllocatorType allocator_type) {
-  uint16_t* data = array->GetData() + offset;
-  SetStringCountAndValueVisitor visitor(array_length, data);
+  SetStringCountAndValueVisitorFromCharArray visitor(array_length, array, offset);
   String* new_string = Alloc<kIsInstrumented>(self, array_length, allocator_type, visitor);
   return new_string;
 }
@@ -159,8 +186,7 @@
 template <bool kIsInstrumented>
 inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string,
                                        int32_t offset, gc::AllocatorType allocator_type) {
-  uint16_t* data = string->GetValue() + offset;
-  SetStringCountAndValueVisitor visitor(string_length, data);
+  SetStringCountAndValueVisitorFromString visitor(string_length, string, offset);
   String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor);
   return new_string;
 }
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index ca94644..782b9c0 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -115,10 +115,14 @@
       } else {
         for (int32_t i = 0; i < ste_array->GetLength(); ++i) {
           StackTraceElement* ste = ste_array->Get(i);
-          result += StringPrintf("  at %s (%s:%d)\n",
-                                 ste->GetMethodName()->ToModifiedUtf8().c_str(),
-                                 ste->GetFileName()->ToModifiedUtf8().c_str(),
-                                 ste->GetLineNumber());
+          DCHECK(ste != nullptr);
+          auto* method_name = ste->GetMethodName();
+          auto* file_name = ste->GetFileName();
+          result += StringPrintf(
+              "  at %s (%s:%d)\n",
+              method_name != nullptr ? method_name->ToModifiedUtf8().c_str() : "<unknown method>",
+              file_name != nullptr ? file_name->ToModifiedUtf8().c_str() : "(Unknown Source)",
+              ste->GetLineNumber());
         }
       }
     } else {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 37e85ab..2f67263 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -230,8 +230,8 @@
   dex_files.push_back(std::move(dex_file));
 
   // Load secondary multidex files
-  for (int i = 1; ; i++) {
-    std::string secondary_dex_location = DexFile::GetMultiDexClassesDexName(i, dex_location);
+  for (size_t i = 1; ; i++) {
+    std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location);
     oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
     if (oat_dex_file == nullptr) {
       // There are no more secondary dex files to load.
@@ -403,9 +403,9 @@
   }
 
   // Verify the dex checksums for any secondary multidex files
-  for (int i = 1; ; i++) {
+  for (size_t i = 1; ; i++) {
     std::string secondary_dex_location
-      = DexFile::GetMultiDexClassesDexName(i, dex_location_);
+      = DexFile::GetMultiDexLocation(i, dex_location_);
     const OatFile::OatDexFile* secondary_oat_dex_file
       = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
     if (secondary_oat_dex_file == nullptr) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index eb60318..2633898 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1566,14 +1566,15 @@
   // Throwing an exception may cause its class initialization. If we mark the transaction
   // aborted before that, we may warn with a false alarm. Throwing the exception before
   // marking the transaction aborted avoids that.
-  preinitialization_transaction_->ThrowAbortError(self, false);
+  preinitialization_transaction_->ThrowAbortError(self, &abort_message);
   preinitialization_transaction_->Abort(abort_message);
 }
 
 void Runtime::ThrowTransactionAbortError(Thread* self) {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
-  preinitialization_transaction_->ThrowAbortError(self, true);
+  // Passing nullptr means we rethrow an exception with the earlier transaction abort message.
+  preinitialization_transaction_->ThrowAbortError(self, nullptr);
 }
 
 void Runtime::RecordWriteFieldBoolean(mirror::Object* obj, MemberOffset field_offset,
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index cc0f15f..ab821d7 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -70,13 +70,21 @@
   }
 }
 
-void Transaction::ThrowAbortError(Thread* self, bool rethrow) {
+void Transaction::ThrowAbortError(Thread* self, const std::string* abort_message) {
+  const bool rethrow = (abort_message == nullptr);
   if (kIsDebugBuild && rethrow) {
     CHECK(IsAborted()) << "Rethrow " << Transaction::kAbortExceptionDescriptor
                        << " while transaction is not aborted";
   }
-  std::string abort_msg(GetAbortMessage());
-  self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature, abort_msg.c_str());
+  if (rethrow) {
+    // Rethrow an exception with the earlier abort message stored in the transaction.
+    self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature,
+                                   GetAbortMessage().c_str());
+  } else {
+    // Throw an exception with the given abort message.
+    self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature,
+                                   abort_message->c_str());
+  }
 }
 
 bool Transaction::IsAborted() {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 4d85662..030478c 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -48,7 +48,7 @@
   void Abort(const std::string& abort_message)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void ThrowAbortError(Thread* self, bool rethrow)
+  void ThrowAbortError(Thread* self, const std::string* abort_message)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsAborted() LOCKS_EXCLUDED(log_lock_);
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 8445751..2838681 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -137,7 +137,7 @@
     if (GetRegisterType(verifier, i).Equals(uninit_type)) {
       line_[i] = init_type.GetId();
       changed++;
-      if (i != this_reg && is_string) {
+      if (is_string && i != this_reg) {
         auto it = verifier->GetStringInitPcRegMap().find(dex_pc);
         if (it != verifier->GetStringInitPcRegMap().end()) {
           it->second.insert(i);
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 962bd7f..f41ff2a 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -44,7 +44,7 @@
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
         for (int i = 0; i < overflowAllocations; i++) {
-            new String("fnord");
+            new Object();
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
diff --git a/test/138-duplicate-classes-check/build b/test/138-duplicate-classes-check/build
deleted file mode 100755
index 7ddc81d..0000000
--- a/test/138-duplicate-classes-check/build
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-
-mkdir classes-ex
-${JAVAC} -d classes-ex `find src-ex -name '*.java'`
-
-if [ ${NEED_DEX} = "true" ]; then
-  ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
-  zip $TEST_NAME.jar classes.dex
-  ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
-  zip ${TEST_NAME}-ex.jar classes.dex
-fi
diff --git a/test/472-unreachable-if-regression/expected.txt b/test/472-unreachable-if-regression/expected.txt
new file mode 100644
index 0000000..9fc8bea
--- /dev/null
+++ b/test/472-unreachable-if-regression/expected.txt
@@ -0,0 +1,3 @@
+Test started.
+Successfully called UnreachableIf().
+Successfully called UnreachablePackedSwitch().
diff --git a/test/472-unreachable-if-regression/info.txt b/test/472-unreachable-if-regression/info.txt
new file mode 100644
index 0000000..d8b5a45
--- /dev/null
+++ b/test/472-unreachable-if-regression/info.txt
@@ -0,0 +1,3 @@
+Regression test for crashes during compilation of methods which end
+with an if-cc or switch, i.e. there's a fall-through out of method code.
+Also tests a packed-switch with negative offset to its data.
diff --git a/test/472-unreachable-if-regression/smali/Test.smali b/test/472-unreachable-if-regression/smali/Test.smali
new file mode 100644
index 0000000..c7107d1
--- /dev/null
+++ b/test/472-unreachable-if-regression/smali/Test.smali
@@ -0,0 +1,46 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static UnreachableIf()V
+    .registers 1
+    return-void
+    : unreachable
+    not-int v0, v0
+    if-lt v0, v0, :unreachable
+    # fall-through out of code item
+.end method
+
+.method public static UnreachablePackedSwitch()V
+    .registers 1
+    return-void
+    : unreachable
+    goto :pswitch_2
+    :pswitch_data
+    .packed-switch 1
+        :pswitch_1
+        :pswitch_2
+        :pswitch_1
+        :pswitch_2
+    .end packed-switch
+    :pswitch_1
+    not-int v0, v0
+    :pswitch_2
+    packed-switch v0, :pswitch_data
+    # fall-through out of code item
+.end method
diff --git a/test/472-unreachable-if-regression/src/Main.java b/test/472-unreachable-if-regression/src/Main.java
new file mode 100644
index 0000000..c9f9511
--- /dev/null
+++ b/test/472-unreachable-if-regression/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String args[]) throws Exception {
+    System.out.println("Test started.");
+    Class<?> c = Class.forName("Test");
+
+    Method unreachableIf = c.getMethod("UnreachableIf", (Class[]) null);
+    unreachableIf.invoke(null, (Object[]) null);
+    System.out.println("Successfully called UnreachableIf().");
+
+    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch", (Class[]) null);
+    unreachablePackedSwitch.invoke(null, (Object[]) null);
+    System.out.println("Successfully called UnreachablePackedSwitch().");
+  }
+
+}
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index 560ce95..83dbb26 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -128,7 +128,7 @@
   // CHECK-DAG:     [[Arg:i\d+]]     ParameterValue
   // CHECK-DAG:                      Return [ [[Arg]] ]
 
-  // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
   // CHECK-NOT:                      If
   // CHECK-NOT:                      Add
 
@@ -139,9 +139,56 @@
     return x;
   }
 
+  // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (before)
+  // CHECK-DAG:                      If
+  // CHECK-DAG:                      If
+  // CHECK-DAG:                      Add
+
+  // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  // CHECK-DAG:     [[Arg:i\d+]]     ParameterValue
+  // CHECK-DAG:                      Return [ [[Arg]] ]
+
+  // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Add
+
+  public static int testUpdateLoopInformation(int x) {
+    // Use of Or in the condition generates a dead loop where not all of its
+    // blocks are removed. This forces DCE to update their loop information.
+    while (inlineFalse() || !inlineTrue()) {
+      x++;
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (before)
+  // CHECK:                          SuspendCheck
+  // CHECK:                          SuspendCheck
+  // CHECK:                          SuspendCheck
+  // CHECK-NOT:                      SuspendCheck
+
+  // CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (after)
+  // CHECK:                          SuspendCheck
+  // CHECK:                          SuspendCheck
+  // CHECK-NOT:                      SuspendCheck
+
+  public static int testRemoveSuspendCheck(int x, int y) {
+    // Inner loop will leave behind the header with its SuspendCheck. DCE must
+    // remove it, otherwise the outer loop would end up with two.
+    while (y > 0) {
+      while (inlineFalse() || !inlineTrue()) {
+        x++;
+      }
+      y--;
+    }
+    return x;
+  }
+
   public static void main(String[] args) {
     assertIntEquals(7, testTrueBranch(4, 3));
     assertIntEquals(1, testFalseBranch(4, 3));
     assertIntEquals(42, testRemoveLoop(42));
+    assertIntEquals(23, testUpdateLoopInformation(23));
+    assertIntEquals(12, testRemoveSuspendCheck(12, 5));
   }
 }
diff --git a/test/482-checker-loop-back-edge-use/expected.txt b/test/482-checker-loop-back-edge-use/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/expected.txt
diff --git a/test/482-checker-loop-back-edge-use/info.txt b/test/482-checker-loop-back-edge-use/info.txt
new file mode 100644
index 0000000..f7fdeff
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/info.txt
@@ -0,0 +1,2 @@
+Tests the register allocator's optimization of adding synthesized uses
+at back edges.
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
new file mode 100644
index 0000000..74184e8
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -0,0 +1,131 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+
+public class Main {
+
+  // CHECK-START: void Main.loop1(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 22) }, uses: { 17 22 }
+  // CHECK:         Goto (liveness: 20)
+  public static void loop1(boolean incoming) {
+    while (incoming) {}
+  }
+
+  // CHECK-START: void Main.loop2(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 42) }, uses: { 33 38 42 }
+  // CHECK:         Goto (liveness: 36)
+  // CHECK:         Goto (liveness: 40)
+  public static void loop2(boolean incoming) {
+    while (true) {
+      System.out.println("foo");
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop3(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 60) }, uses: { 56 60 }
+  // CHECK:         Goto (liveness: 58)
+
+  // CHECK-START: void Main.loop3(boolean) liveness (after)
+  // CHECK-NOT:     Goto (liveness: 54)
+  public static void loop3(boolean incoming) {
+    // 'incoming' only needs a use at the outer loop's back edge.
+    while (System.currentTimeMillis() != 42) {
+      while (Runtime.getRuntime() != null) {}
+      System.out.println(incoming);
+    }
+  }
+
+  // CHECK-START: void Main.loop4(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 22) }, uses: { 22 }
+
+  // CHECK-START: void Main.loop4(boolean) liveness (after)
+  // CHECK-NOT:     Goto (liveness: 20)
+  public static void loop4(boolean incoming) {
+    // 'incoming' has no loop use, so should not have back edge uses.
+    System.out.println(incoming);
+    while (System.currentTimeMillis() != 42) {
+      while (Runtime.getRuntime() != null) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop5(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 50) }, uses: { 33 42 46 50 }
+  // CHECK:         Goto (liveness: 44)
+  // CHECK:         Goto (liveness: 48)
+  public static void loop5(boolean incoming) {
+    // 'incoming' must have a use at both back edges.
+    while (Runtime.getRuntime() != null) {
+      while (incoming) {
+        System.out.println(incoming);
+      }
+    }
+  }
+
+  // CHECK-START: void Main.loop6(boolean) liveness (after)
+  // CHECK          ParameterValue (liveness: 2 ranges: { [2, 46) }, uses: { 24 46 }
+  // CHECK:         Goto (liveness: 44)
+
+  // CHECK-START: void Main.loop6(boolean) liveness (after)
+  // CHECK-NOT:     Goto (liveness: 22)
+  public static void loop6(boolean incoming) {
+    // 'incoming' must have a use only at the first loop's back edge.
+    while (true) {
+      System.out.println(incoming);
+      while (Runtime.getRuntime() != null) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop7(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 50) }, uses: { 32 41 46 50 }
+  // CHECK:         Goto (liveness: 44)
+  // CHECK:         Goto (liveness: 48)
+  public static void loop7(boolean incoming) {
+    // 'incoming' must have a use at both back edges.
+    while (Runtime.getRuntime() != null) {
+      System.out.println(incoming);
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop8() liveness (after)
+  // CHECK:         StaticFieldGet (liveness: 12 ranges: { [12, 44) }, uses: { 35 40 44 }
+  // CHECK:         Goto (liveness: 38)
+  // CHECK:         Goto (liveness: 42)
+  public static void loop8() {
+    // 'incoming' must have a use at both back edges.
+    boolean incoming = field;
+    while (Runtime.getRuntime() != null) {
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop9() liveness (after)
+  // CHECK:         StaticFieldGet (liveness: 22 ranges: { [22, 36) }, uses: { 31 36 }
+  // CHECK:         Goto (liveness: 38)
+  public static void loop9() {
+    while (Runtime.getRuntime() != null) {
+      // 'incoming' must only have a use in the inner loop.
+      boolean incoming = field;
+      while (incoming) {}
+    }
+  }
+
+  public static void main(String[] args) {
+  }
+
+  static boolean field;
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 93340fb..c7e6877 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -385,8 +385,7 @@
 
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 099-vmdebug # b/18098594
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 802-deoptimization # b/18547544
+TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 472-unreachable-if-regression # b/19988134
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \