30 files changed, 692 insertions, 1307 deletions
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 26bf1cbc75..1d604e7135 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -56,7 +56,6 @@ class HGraphBuilder : public ValueObject {
         return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(compiler_stats),
         interpreter_metadata_(interpreter_metadata),
         dex_cache_(dex_cache) {}
@@ -77,7 +76,6 @@ class HGraphBuilder : public ValueObject {
         return_type_(return_type),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(nullptr),
         interpreter_metadata_(nullptr),
         null_dex_cache_(),
@@ -85,10 +83,6 @@ class HGraphBuilder : public ValueObject {
 
   bool BuildGraph(const DexFile::CodeItem& code);
 
-  bool CanUseBaselineForStringInit() const {
-    return can_use_baseline_for_string_init_;
-  }
-
   static constexpr const char* kBuilderPassName = "builder";
 
   // The number of entries in a packed switch before we use a jump table or specified
@@ -363,11 +357,6 @@ class HGraphBuilder : public ValueObject {
   // used by move-result instructions.
   HInstruction* latest_result_;
 
-  // We need to know whether we have built a graph that has calls to StringFactory
-  // and hasn't gone through the verifier. If the following flag is `false`, then
-  // we cannot compile with baseline.
-  bool can_use_baseline_for_string_init_;
-
   OptimizingCompilerStats* compilation_stats_;
 
   const uint8_t* interpreter_metadata_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ea0b9eca9a..a3bbfdbd27 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -142,23 +142,6 @@ size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
   return pointer_size * index;
 }
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
-  Initialize();
-  if (!is_leaf) {
-    MarkNotLeaf();
-  }
-  const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
-  InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
-                             + GetGraph()->GetTemporariesVRegSlots()
-                             + 1 /* filler */,
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           GetGraph()->GetMaximumNumberOfOutVRegs()
-                             + (is_64_bit ? 2 : 1) /* current method */,
-                           GetGraph()->GetBlocks());
-  CompileInternal(allocator, /* is_baseline */ true);
-}
-
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -220,8 +203,12 @@ void CodeGenerator::GenerateSlowPaths() {
   current_slow_path_ = nullptr;
 }
 
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
-  is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+  // The register allocator already called `InitializeCodeGeneration`,
+  // where the frame size has been computed.
+  DCHECK(block_order_ != nullptr);
+  Initialize();
+
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
 
@@ -242,9 +229,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       DisassemblyScope disassembly_scope(current, *this);
-      if (is_baseline) {
-        InitLocationsBaseline(current);
-      }
       DCHECK(CheckTypeConsistency(current));
       current->Accept(instruction_visitor);
     }
@@ -254,7 +238,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
 
   // Emit catch stack maps at the end of the stack map stream as expected by the
   // runtime exception handler.
-  if (!is_baseline && graph_->HasTryCatch()) {
+  if (graph_->HasTryCatch()) {
     RecordCatchBlockInfo();
   }
 
@@ -262,14 +246,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
   Finalize(allocator);
 }
 
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
-  // The register allocator already called `InitializeCodeGeneration`,
-  // where the frame size has been computed.
-  DCHECK(block_order_ != nullptr);
-  Initialize();
-  CompileInternal(allocator, /* is_baseline */ false);
-}
-
 void CodeGenerator::Finalize(CodeAllocator* allocator) {
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -282,29 +258,6 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A
   // No linker patches by default.
 }
 
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
-  for (size_t i = 0; i < length; ++i) {
-    if (!array[i]) {
-      array[i] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
-  for (size_t i = 0; i < length - 1; i += 2) {
-    if (!array[i] && !array[i + 1]) {
-      array[i] = true;
-      array[i + 1] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
                                              size_t maximum_number_of_live_fpu_registers,
@@ -592,123 +545,6 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
   }
 }
 
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
-  LocationSummary* locations = instruction->GetLocations();
-  if (locations == nullptr) return;
-
-  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
-    blocked_core_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
-    blocked_fpu_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
-    blocked_register_pairs_[i] = false;
-  }
-
-  // Mark all fixed input, temp and output registers as used.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    BlockIfInRegister(locations->InAt(i));
-  }
-
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    BlockIfInRegister(loc);
-  }
-  Location result_location = locations->Out();
-  if (locations->OutputCanOverlapWithInputs()) {
-    BlockIfInRegister(result_location, /* is_out */ true);
-  }
-
-  SetupBlockedRegisters(/* is_baseline */ true);
-
-  // Allocate all unallocated input locations.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    Location loc = locations->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (loc.IsUnallocated()) {
-      if ((loc.GetPolicy() == Location::kRequiresRegister)
-          || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
-        loc = AllocateFreeRegister(input->GetType());
-      } else {
-        DCHECK_EQ(loc.GetPolicy(), Location::kAny);
-        HLoadLocal* load = input->AsLoadLocal();
-        if (load != nullptr) {
-          loc = GetStackLocation(load);
-        } else {
-          loc = AllocateFreeRegister(input->GetType());
-        }
-      }
-      locations->SetInAt(i, loc);
-    }
-  }
-
-  // Allocate all unallocated temp locations.
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    if (loc.IsUnallocated()) {
-      switch (loc.GetPolicy()) {
-        case Location::kRequiresRegister:
-          // Allocate a core register (large enough to fit a 32-bit integer).
-          loc = AllocateFreeRegister(Primitive::kPrimInt);
-          break;
-
-        case Location::kRequiresFpuRegister:
-          // Allocate a core register (large enough to fit a 64-bit double).
-          loc = AllocateFreeRegister(Primitive::kPrimDouble);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << loc.GetPolicy();
-      }
-      locations->SetTempAt(i, loc);
-    }
-  }
-  if (result_location.IsUnallocated()) {
-    switch (result_location.GetPolicy()) {
-      case Location::kAny:
-      case Location::kRequiresRegister:
-      case Location::kRequiresFpuRegister:
-        result_location = AllocateFreeRegister(instruction->GetType());
-        break;
-      case Location::kSameAsFirstInput:
-        result_location = locations->InAt(0);
-        break;
-    }
-    locations->UpdateOut(result_location);
-  }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
-  AllocateLocations(instruction);
-  if (instruction->GetLocations() == nullptr) {
-    if (instruction->IsTemporary()) {
-      HInstruction* previous = instruction->GetPrevious();
-      Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-      Move(previous, temp_location, instruction);
-    }
-    return;
-  }
-  AllocateRegistersLocally(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    Location location = instruction->GetLocations()->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (location.IsValid()) {
-      // Move the input to the desired location.
-      if (input->GetNext()->IsTemporary()) {
-        // If the input was stored in a temporary, use that temporary to
-        // perform the move.
-        Move(input->GetNext(), location, instruction);
-      } else {
-        Move(input, location, instruction);
-      }
-    }
-  }
-}
-
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
@@ -789,132 +625,6 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
   }
 }
 
-void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
-  const std::vector<uint8_t>& gc_map_raw =
-      compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
-          ->GetDexGcMap();
-  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-
-  uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
-  size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
-  GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
-  for (size_t i = 0; i != num_stack_maps; ++i) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    uint32_t native_offset = stack_map_entry.native_pc_offset;
-    uint32_t dex_pc = stack_map_entry.dex_pc;
-    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
-    builder.AddEntry(native_offset, references);
-  }
-}
-
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
-  uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
-  uint32_t pc2dex_offset = 0u;
-  int32_t pc2dex_dalvik_offset = 0;
-  uint32_t dex2pc_data_size = 0u;
-  uint32_t dex2pc_entries = 0u;
-  uint32_t dex2pc_offset = 0u;
-  int32_t dex2pc_dalvik_offset = 0;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
-    pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  // Walk over the blocks and find which ones correspond to catch block entries.
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      ++dex2pc_entries;
-      dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
-      dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
-  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
-  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
-  data->resize(data_size);
-
-  uint8_t* data_ptr = &(*data)[0];
-  uint8_t* write_pos = data_ptr;
-
-  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
-  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
-  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
-  pc2dex_offset = 0u;
-  pc2dex_dalvik_offset = 0u;
-  dex2pc_offset = 0u;
-  dex2pc_dalvik_offset = 0u;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
-    write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
-    write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
-      write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
-  DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
-  if (kIsDebugBuild) {
-    // Verify the encoded table holds the expected data.
-    MappingTable table(data_ptr);
-    CHECK_EQ(table.TotalSize(), total_entries);
-    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
-    auto it = table.PcToDexBegin();
-    auto it2 = table.DexToPcBegin();
-    for (size_t i = 0; i < pc2dex_entries; i++) {
-      const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-      CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
-      CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
-      ++it;
-    }
-    for (HBasicBlock* block : graph_->GetBlocks()) {
-      if (block->IsCatchBlock()) {
-        CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
-        CHECK_EQ(block->GetDexPc(), it2.DexPc());
-        ++it2;
-      }
-    }
-    CHECK(it == table.PcToDexEnd());
-    CHECK(it2 == table.DexToPcEnd());
-  }
-}
-
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
-  // We currently don't use callee-saved registers.
-  size_t size = 0 + 1 /* marker */ + 0;
-  vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
-  vmap_encoder.PushBackUnsigned(size);
-  vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
 size_t CodeGenerator::ComputeStackMapsSize() {
   return stack_map_stream_.PrepareForFillIn();
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5958cd89bc..4f8f146753 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -158,10 +158,8 @@ class FieldAccessCallingConvention {
 
 class CodeGenerator {
  public:
-  // Compiles the graph to executable instructions. Returns whether the compilation
-  // succeeded.
-  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
-  void CompileOptimized(CodeAllocator* allocator);
+  // Compiles the graph to executable instructions.
+  void Compile(CodeAllocator* allocator);
   static CodeGenerator* Create(HGraph* graph,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures& isa_features,
@@ -214,7 +212,7 @@ class CodeGenerator {
 
   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
-  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+  virtual void SetupBlockedRegisters() const = 0;
 
   virtual void ComputeSpillMask() {
     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -290,17 +288,9 @@ class CodeGenerator {
     slow_paths_.push_back(slow_path);
   }
 
-  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
-  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
-  void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
   void BuildStackMaps(MemoryRegion region);
   size_t ComputeStackMapsSize();
 
-  bool IsBaseline() const {
-    return is_baseline_;
-  }
-
   bool IsLeafMethod() const {
     return is_leaf_;
   }
@@ -489,7 +479,6 @@ class CodeGenerator {
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
-        is_baseline_(false),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -502,15 +491,6 @@ class CodeGenerator {
     slow_paths_.reserve(8);
   }
 
-  // Register allocation logic.
-  void AllocateRegistersLocally(HInstruction* instruction) const;
-
-  // Backend specific implementation for allocating a register.
-  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
-  static size_t FindFreeEntry(bool* array, size_t length);
-  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
   virtual HGraphVisitor* GetLocationBuilder() = 0;
@@ -593,16 +573,11 @@ class CodeGenerator {
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
-  // Whether we are using baseline.
-  bool is_baseline_;
-
   DisassemblyInformation* disasm_info_;
 
  private:
-  void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
-  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a11ceb9bd9..272579219f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,9 +47,7 @@ static bool ExpectedPairLayout(Location location) {
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = R0;
 
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
 static constexpr Register kCoreCalleeSaves[] =
     { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
@@ -728,6 +726,24 @@ inline Condition ARMUnsignedCondition(IfCondition cond) {
   UNREACHABLE();
 }
 
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return EQ;
+    case kCondNE: return NE /* unordered */;
+    case kCondLT: return gt_bias ? CC : LT /* unordered */;
+    case kCondLE: return gt_bias ? LS : LE /* unordered */;
+    case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+    case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -815,58 +831,7 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
   CodeGenerator::Finalize(allocator);
 }
 
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      ArmManagedRegister pair =
-          ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        ArmManagedRegister current =
-            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimDouble: {
-      int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
-      DCHECK_EQ(reg % 2, 0);
-      return Location::FpuRegisterPairLocation(reg, reg + 1);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[R1_R2] = true;
 
@@ -881,15 +846,7 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
@@ -919,11 +876,10 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene
 
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
-  // Save one extra register for baseline. Note that on thumb2, there is no easy
-  // instruction to restore just the PC, so this actually helps both baseline
-  // and non-baseline to save and restore at least two registers at entry and exit.
-  core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
   DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // There is no easy instruction to restore just the PC on thumb2. We spill and
+  // restore another arbitrary register.
+  core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   // We use vpush and vpop for saving and restoring floating point registers, which take
   // a SRegister and the number of registers to save/restore after that SRegister. We
@@ -1416,15 +1372,9 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
-                                                  Label* false_label) {
+                                                  Label* false_label ATTRIBUTE_UNUSED) {
   __ vmstat();  // transfer FP status register to ARM APSR.
-  // TODO: merge into a single branch (except "equal or unordered" and "not equal")
-  if (cond->IsFPConditionTrueIfNaN()) {
-    __ b(true_label, VS);  // VS for unordered.
-  } else if (cond->IsFPConditionFalseIfNaN()) {
-    __ b(false_label, VS);  // VS for unordered.
-  }
-  __ b(true_label, ARMCondition(cond->GetCondition()));
+  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
 }
 
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1972,9 +1922,9 @@ void InstructionCodeGeneratorARM::VisitInvokeUnresolved(HInvokeUnresolved* invok
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetAssembler(),
@@ -2004,9 +1954,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen)
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3803,6 +3753,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
 
   Label less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond;
   switch (type) {
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3813,6 +3764,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
       __ LoadImmediate(out, 0);
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()));  // Unsigned compare.
+      less_cond = LO;
       break;
     }
     case Primitive::kPrimFloat:
@@ -3825,14 +3777,15 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
                  FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
       }
       __ vmstat();  // transfer FP status register to ARM APSR.
-      __ b(compare->IsGtBias() ? &greater : &less, VS);  // VS for unordered.
+      less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
+      UNREACHABLE();
   }
   __ b(&done, EQ);
-  __ b(&less, LO);  // LO is for both: unsigned compare for longs and 'less than' for floats.
+  __ b(&less, less_cond);
 
   __ Bind(&greater);
   __ LoadImmediate(out, 1);
@@ -5530,7 +5483,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5740,8 +5693,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6027,6 +5980,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
       codegen_->AddSlowPath(slow_path);
 
+      // IP = Thread::Current()->GetIsGcMarking()
       __ LoadFromOffset(
           kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
       __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6105,11 +6059,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
   //   }
   //
   // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as:
-  // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
-  // - it performs additional checks that we do not do here for
-  //   performance reasons.
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
 
   Register ref_reg = ref.AsRegister<Register>();
   Register temp_reg = temp.AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 26d6d63b31..d45ea973f9 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -340,9 +340,7 @@ class CodeGeneratorARM : public CodeGenerator {
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -444,7 +442,7 @@ class CodeGeneratorARM : public CodeGenerator {
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -452,7 +450,7 @@ class CodeGeneratorARM : public CodeGenerator {
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5e905fc9aa..2cb2741b17 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@ inline Condition ARM64Condition(IfCondition cond) {
   UNREACHABLE();
 }
 
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM64 condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return eq;
+    case kCondNE: return ne /* unordered */;
+    case kCondLT: return gt_bias ? cc : lt /* unordered */;
+    case kCondLE: return gt_bias ? ls : le /* unordered */;
+    case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+    case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 Location ARM64ReturnLocation(Primitive::Type return_type) {
   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -604,30 +622,13 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
             instruction_->GetLocations()->Intrinsified()));
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
 
     __ Bind(GetEntryLabel());
 
-    // Note: In the case of a HArrayGet instruction, when the base
-    // address is a HArm64IntermediateAddress instruction, it does not
-    // point to the array object itself, but to an offset within this
-    // object. However, the read barrier entry point needs the array
-    // object address to be passed as first argument. So we
-    // temporarily set back `obj_` to that address, and restore its
-    // initial value later.
-    if (instruction_->IsArrayGet() &&
-        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* intermediate_address =
-            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
-        uint32_t intermediate_address_offset =
-            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
-        DCHECK_EQ(intermediate_address_offset, offset_);
-        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
-      }
-      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
-      __ Sub(obj_reg, obj_reg, offset_);
-    }
-
     SaveLiveRegisters(codegen, locations);
 
     // We may have to change the index's value, but as `index_` is a
@@ -728,22 +729,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
 
     RestoreLiveRegisters(codegen, locations);
 
-    // Restore the value of `obj_` when it corresponds to a
-    // HArm64IntermediateAddress instruction.
-    if (instruction_->IsArrayGet() &&
-        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* intermediate_address =
-            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
-        uint32_t intermediate_address_offset =
-            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
-        DCHECK_EQ(intermediate_address_offset, offset_);
-        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
-      }
-      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
-      __ Add(obj_reg, obj_reg, offset_);
-    }
-
     __ B(GetExitLabel());
   }
 
@@ -1127,7 +1112,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
   }
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
   // Blocked core registers:
   //      lr        : Runtime reserved.
   //      tr        : Runtime reserved.
@@ -1148,40 +1133,17 @@ void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
   }
 
-  if (is_baseline) {
-    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
-    while (!reserved_core_baseline_registers.IsEmpty()) {
-      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
-    }
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
-    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
-    while (!reserved_fp_baseline_registers.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+    while (!reserved_fp_registers_debuggable.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
     }
   }
 }
 
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   __ Str(reg, MemOperand(sp, stack_index));
@@ -1970,6 +1932,9 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
 }
 
 void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the
+  // HArm64IntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1979,6 +1944,9 @@ void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddr
 
 void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
     HArm64IntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the
+  // HArm64IntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2067,6 +2035,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   } else {
     Register temp = temps.AcquireSameSizeAs(obj);
     if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      // The read barrier instrumentation does not support the
+      // HArm64IntermediateAddress instruction yet.
+      DCHECK(!kEmitCompilerReadBarrier);
       // We do not need to compute the intermediate address from the array: the
       // input instruction has done it already. See the comment in
       // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2093,11 +2064,6 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
     if (index.IsConstant()) {
       codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
     } else {
-      // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
-      // not contain the base address of the array object, which is
-      // needed by the read barrier entry point. So the read barrier
-      // slow path will temporarily set back `obj_loc` to the right
-      // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
       codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
     }
   }
@@ -2161,6 +2127,9 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HArm64IntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2407,12 +2376,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
       } else {
         __ Fcmp(left, InputFPRegisterAt(compare, 1));
       }
-      if (compare->IsGtBias()) {
-        __ Cset(result, ne);
-      } else {
-        __ Csetm(result, ne);
-      }
-      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+      __ Cset(result, ne);
+      __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
       break;
     }
     default:
@@ -2448,7 +2413,6 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   IfCondition if_cond = instruction->GetCondition();
-  Condition arm64_cond = ARM64Condition(if_cond);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
     FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2459,20 +2423,13 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
     } else {
       __ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
     }
-    __ Cset(res, arm64_cond);
-    if (instruction->IsFPConditionTrueIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 1 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 1
-      __ Csel(res, res, Operand(1), vc);  // VC for "not unordered".
-    } else if (instruction->IsFPConditionFalseIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 0 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 0
-      __ Csel(res, res, Operand(0), vc);  // VC for "not unordered".
-    }
+    __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   } else {
     // Integer cases.
     Register lhs = InputRegisterAt(instruction, 0);
     Operand rhs = InputOperandAt(instruction, 1);
     __ Cmp(lhs, rhs);
-    __ Cset(res, arm64_cond);
+    __ Cset(res, ARM64Condition(if_cond));
   }
 }
 
@@ -2842,15 +2799,11 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
       } else {
         __ Fcmp(lhs, InputFPRegisterAt(condition, 1));
       }
-      if (condition->IsFPConditionTrueIfNaN()) {
-        __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
-      } else if (condition->IsFPConditionFalseIfNaN()) {
-        __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
-      }
       if (true_target == nullptr) {
-        __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+        IfCondition opposite_condition = condition->GetOppositeCondition();
+        __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
       } else {
-        __ B(ARM64Condition(condition->GetCondition()), true_target);
+        __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
       }
     } else {
       // Integer cases.
@@ -3488,9 +3441,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
@@ -3738,9 +3691,9 @@ vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
 
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f2ff89488e..8eb9fcc558 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -339,10 +339,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e34767cecd..5bd136a3f0 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1042,7 +1042,7 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[A1_A2] = true;
 
@@ -1072,16 +1072,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
     blocked_fpu_registers_[i] = true;
   }
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1096,52 +1086,6 @@ void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const {
   }
 }
 
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      MipsManagedRegister pair =
-          MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        MipsManagedRegister current =
-            MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  UNREACHABLE();
-}
-
 size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
   return kMipsWordSize;
@@ -3835,9 +3779,9 @@ void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
 }
 
 void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3973,9 +3917,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c3d4851ee9..2cde0ed90b 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,10 +290,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 79cd56d698..05054867fe 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -979,7 +979,7 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
   // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
   blocked_core_registers_[ZERO] = true;
   blocked_core_registers_[K0] = true;
@@ -1003,8 +1003,7 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE
 
   // TODO: review; anything else?
 
-  // TODO: make these two for's conditional on is_baseline once
-  // all the issues with register saving/restoring are sorted out.
+  // TODO: remove once all the issues with register saving/restoring are sorted out.
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     blocked_core_registers_[kCoreCalleeSaves[i]] = true;
   }
@@ -1014,20 +1013,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE
   }
 }
 
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
   return kMips64WordSize;
@@ -3031,9 +3016,9 @@ void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
 }
 
 void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3182,9 +3167,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 7182e8e987..140ff95f14 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -289,10 +289,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6259acded3..f7ccdd8b8f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -817,65 +817,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      X86ManagedRegister pair =
-          X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      Register reg = static_cast<Register>(
-          FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        X86ManagedRegister current =
-            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      return Location::FpuRegisterLocation(
-          FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[ECX_EDX] = true;
 
   // Stack register is always reserved.
   blocked_core_registers_[ESP] = true;
 
-  if (is_baseline) {
-    blocked_core_registers_[EBP] = true;
-    blocked_core_registers_[ESI] = true;
-    blocked_core_registers_[EDI] = true;
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1981,9 +1929,9 @@ void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invok
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1999,17 +1947,6 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
   if (invoke->HasPcRelativeDexCache()) {
     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
-
-  if (codegen_->IsBaseline()) {
-    // Baseline does not have enough registers if the current method also
-    // needs a register. We therefore do not require a register for it, and let
-    // the code generation of the invoke handle it.
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -2022,9 +1959,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen)
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4286,7 +4223,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
       if (current_method.IsRegister()) {
         method_reg = current_method.AsRegister<Register>();
       } else {
-        DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+        DCHECK(invoke->GetLocations()->Intrinsified());
         DCHECK(!current_method.IsValid());
         method_reg = reg;
         __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
@@ -5076,11 +5013,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
-  // This location builder might end up asking to up to four registers, which is
-  // not currently possible for baseline. The situation in which we need four
-  // registers cannot be met by baseline though, because it has not run any
-  // optimization.
-
   Primitive::Type value_type = instruction->GetComponentType();
 
   bool needs_write_barrier =
@@ -6077,7 +6009,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -6308,8 +6240,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6588,6 +6520,8 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6650,7 +6584,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c65c423eae..43e9543e41 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -359,9 +359,7 @@ class CodeGeneratorX86 : public CodeGenerator {
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -453,7 +451,7 @@ class CodeGeneratorX86 : public CodeGenerator {
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -461,7 +459,7 @@ class CodeGeneratorX86 : public CodeGenerator {
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e024ce2b6c..2ce2d91502 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1002,47 +1002,12 @@ InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   // Stack register is always reserved.
   blocked_core_registers_[RSP] = true;
 
   // Block the register used as TMP.
   blocked_core_registers_[TMP] = true;
-
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -2161,9 +2126,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* in
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -2183,9 +2148,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4698,13 +4663,13 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool object_array_set_with_read_barrier =
       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      (may_need_runtime_call || object_array_set_with_read_barrier) ?
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
           LocationSummary::kCallOnSlowPath :
           LocationSummary::kNoCall);
 
@@ -4733,7 +4698,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4785,7 +4750,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4794,7 +4759,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4872,7 +4837,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -5661,7 +5626,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5892,8 +5857,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6155,6 +6120,8 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6217,7 +6184,9 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+  //   here, which is a no-op thanks to the x86-64 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 505c9dcdad..82aabb04d3 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -347,8 +347,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -401,7 +400,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t offset,
                                              Location temp,
@@ -409,7 +408,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d970704368..19d63de499 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -40,6 +40,7 @@
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
+#include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
@@ -70,8 +71,8 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
     AddAllocatedRegister(Location::RegisterLocation(arm::R7));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters();
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
@@ -90,8 +91,8 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
     AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    x86::CodeGeneratorX86::SetupBlockedRegisters();
     // ebx is a callee-save register in C, but caller-save for ART.
     blocked_core_registers_[x86::EBX] = true;
     blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -200,259 +201,228 @@ static void Run(const InternalCodeAllocator& allocator,
 }
 
 template <typename Expected>
-static void RunCodeBaseline(InstructionSet target_isa,
-                            HGraph* graph,
-                            bool has_result,
-                            Expected expected) {
-  InternalCodeAllocator allocator;
-
-  CompilerOptions compiler_options;
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-  // We avoid doing a stack overflow check that requires the runtime being setup,
-  // by making sure the compiler knows the methods we are running are leaf methods.
-  codegenX86.CompileBaseline(&allocator, true);
-  if (target_isa == kX86) {
-    Run(allocator, codegenX86, has_result, expected);
-  }
+static void RunCode(CodeGenerator* codegen,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  ASSERT_TRUE(graph->IsInSsaForm());
 
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-  codegenARM.CompileBaseline(&allocator, true);
-  if (target_isa == kArm || target_isa == kThumb2) {
-    Run(allocator, codegenARM, has_result, expected);
-  }
-
-  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
-      X86_64InstructionSetFeatures::FromCppDefines());
-  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-  codegenX86_64.CompileBaseline(&allocator, true);
-  if (target_isa == kX86_64) {
-    Run(allocator, codegenX86_64, has_result, expected);
-  }
-
-  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
-      Arm64InstructionSetFeatures::FromCppDefines());
-  arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-  codegenARM64.CompileBaseline(&allocator, true);
-  if (target_isa == kArm64) {
-    Run(allocator, codegenARM64, has_result, expected);
-  }
-
-  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
-      MipsInstructionSetFeatures::FromCppDefines());
-  mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-  codegenMIPS.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips) {
-    Run(allocator, codegenMIPS, has_result, expected);
-  }
-
-  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
-      Mips64InstructionSetFeatures::FromCppDefines());
-  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-  codegenMIPS64.CompileBaseline(&allocator, true);
-  if (target_isa == kMips64) {
-    Run(allocator, codegenMIPS64, has_result, expected);
-  }
-}
+  SSAChecker graph_checker(graph);
+  graph_checker.Run();
+  ASSERT_TRUE(graph_checker.IsValid());
 
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
-  // Tests may have already computed it.
-  if (graph->GetReversePostOrder().empty()) {
-    graph->BuildDominatorTree();
-  }
   SsaLivenessAnalysis liveness(graph, codegen);
-  liveness.Analyze();
 
-  RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
-  register_allocator.AllocateRegisters();
+  PrepareForRegisterAllocation(graph).Run();
+  liveness.Analyze();
+  RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
-  codegen->CompileOptimized(&allocator);
+  codegen->Compile(&allocator);
   Run(allocator, *codegen, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeOptimized(InstructionSet target_isa,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
+static void RunCode(InstructionSet target_isa,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
   CompilerOptions compiler_options;
   if (target_isa == kArm || target_isa == kThumb2) {
     std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
         ArmInstructionSetFeatures::FromCppDefines());
     TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-    RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kArm64) {
     std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
         Arm64InstructionSetFeatures::FromCppDefines());
     arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-    RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-    RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
 }
 
-static void TestCode(InstructionSet target_isa,
-                     const uint16_t* data,
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+static void TestCode(const uint16_t* data,
                      bool has_result = false,
                      int32_t expected = 0) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
-}
-
-static void TestCodeLong(InstructionSet target_isa,
-                         const uint16_t* data,
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
+}
+
+static void TestCodeLong(const uint16_t* data,
                          bool has_result,
                          int64_t expected) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph, Primitive::kPrimLong);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph, Primitive::kPrimLong);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+class CodegenTest : public CommonCompilerTest {};
 
-TEST_P(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0xFF00);
 
-  TestCode(GetParam(), data1);
+  TestCode(data1);
 
   const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_16, 3,
     Instruction::RETURN_VOID,
     Instruction::GOTO_16, 0xFFFF);
 
-  TestCode(GetParam(), data2);
+  TestCode(data2);
 
   const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 4, 0,
     Instruction::RETURN_VOID,
     Instruction::GOTO_32, 0xFFFF, 0xFFFF);
 
-  TestCode(GetParam(), data3);
+  TestCode(data3);
 }
 
-TEST_P(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0xFE00);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -460,10 +430,10 @@ TEST_P(CodegenTest, ReturnIf1) {
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -471,12 +441,12 @@ TEST_P(CodegenTest, ReturnIf2) {
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) {                        \
+TEST_F(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -485,7 +455,7 @@ TEST_P(CodegenTest, TEST_NAME) {                        \
       Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
       Instruction::RETURN | 1 << 8);                    \
                                                         \
-  TestCode(GetParam(), data, true, EXPECTED_OUTPUT);    \
+  TestCode(data, true, EXPECTED_OUTPUT);                \
 }
 
 NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -501,7 +471,7 @@ NOT_INT_TEST(ReturnNotIntINT32_MAX, 2147483647, -2147483648)  // -(2^31)
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST_P(CodegenTest, TEST_NAME) {                                         \
+TEST_F(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -512,7 +482,7 @@ TEST_P(CodegenTest, TEST_NAME) {                                         \
       Instruction::NOT_LONG | 2 << 8 | 0 << 12,                          \
       Instruction::RETURN_WIDE | 2 << 8);                                \
                                                                          \
-  TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT);                 \
+  TestCodeLong(data, true, EXPECTED_OUTPUT);                             \
 }
 
 NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -551,7 +521,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX,
 
 #undef NOT_LONG_TEST
 
-TEST_P(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -565,192 +535,146 @@ TEST_P(CodegenTest, IntToLongOfLongToInt) {
       Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
       Instruction::RETURN_WIDE | 2 << 8);
 
-  TestCodeLong(GetParam(), data, true, 1);
+  TestCodeLong(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
-}
-
-TEST_P(CodegenTest, NonMaterializedCondition) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  entry->AddInstruction(new (&allocator) HGoto());
-
-  HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(first_block);
-  entry->AddSuccessor(first_block);
-  HIntConstant* constant0 = graph->GetIntConstant(0);
-  HIntConstant* constant1 = graph->GetIntConstant(1);
-  HEqual* equal = new (&allocator) HEqual(constant0, constant0);
-  first_block->AddInstruction(equal);
-  first_block->AddInstruction(new (&allocator) HIf(equal));
-
-  HBasicBlock* then = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
-  graph->AddBlock(then);
-  graph->AddBlock(else_);
-  graph->AddBlock(exit);
-  first_block->AddSuccessor(then);
-  first_block->AddSuccessor(else_);
-  then->AddSuccessor(exit);
-  else_->AddSuccessor(exit);
-
-  exit->AddInstruction(new (&allocator) HExit());
-  then->AddInstruction(new (&allocator) HReturn(constant0));
-  else_->AddInstruction(new (&allocator) HReturn(constant1));
-
-  ASSERT_TRUE(equal->NeedsMaterialization());
-  graph->BuildDominatorTree();
-  PrepareForRegisterAllocation(graph).Run();
-  ASSERT_FALSE(equal->NeedsMaterialization());
-
-  auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-    HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-    block->InsertInstructionBefore(move, block->GetLastInstruction());
-  };
-
-  RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0 << 8,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, MaterializedCondition1) {
-  // Check that condition are materialized correctly. A materialized condition
-  // should yield `1` if it evaluated to true, and `0` otherwise.
-  // We force the materialization of comparisons for different combinations of
-  // inputs and check the results.
-
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
-
-  for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
-    HGraph* graph = CreateGraph(&allocator);
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
-    HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(code_block);
+    HGraph* graph = CreateGraph(&allocator);
+    HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry);
+    graph->SetEntryBlock(entry);
+    entry->AddInstruction(new (&allocator) HGoto());
+
+    HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(first_block);
+    entry->AddSuccessor(first_block);
+    HIntConstant* constant0 = graph->GetIntConstant(0);
+    HIntConstant* constant1 = graph->GetIntConstant(1);
+    HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+    first_block->AddInstruction(equal);
+    first_block->AddInstruction(new (&allocator) HIf(equal));
+
+    HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+    HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
     HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+    graph->SetExitBlock(exit_block);
+
+    graph->AddBlock(then_block);
+    graph->AddBlock(else_block);
     graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
+    first_block->AddSuccessor(then_block);
+    first_block->AddSuccessor(else_block);
+    then_block->AddSuccessor(exit_block);
+    else_block->AddSuccessor(exit_block);
 
-    entry_block->AddSuccessor(code_block);
-    code_block->AddSuccessor(exit_block);
-    graph->SetExitBlock(exit_block);
+    exit_block->AddInstruction(new (&allocator) HExit());
+    then_block->AddInstruction(new (&allocator) HReturn(constant0));
+    else_block->AddInstruction(new (&allocator) HReturn(constant1));
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    code_block->AddInstruction(&cmp_lt);
-    HReturn ret(&cmp_lt);
-    code_block->AddInstruction(&ret);
+    ASSERT_TRUE(equal->NeedsMaterialization());
+    TransformToSsa(graph);
+    PrepareForRegisterAllocation(graph).Run();
+    ASSERT_FALSE(equal->NeedsMaterialization());
 
     auto hook_before_codegen = [](HGraph* graph_in) {
       HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -758,93 +682,143 @@ TEST_P(CodegenTest, MaterializedCondition1) {
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCode(target_isa, graph, hook_before_codegen, true, 0);
   }
 }
 
-TEST_P(CodegenTest, MaterializedCondition2) {
-  // Check that HIf correctly interprets a materialized condition.
-  // We force the materialization of comparisons for different combinations of
-  // inputs. An HIf takes the materialized combination as input and returns a
-  // value that we verify.
-
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
-
-
-  for (size_t i = 0; i < arraysize(lhs); i++) {
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    HGraph* graph = CreateGraph(&allocator);
-
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
-
-    HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_block);
-    HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_true_block);
-    HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_false_block);
-    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
-
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddSuccessor(if_block);
-    if_block->AddSuccessor(if_true_block);
-    if_block->AddSuccessor(if_false_block);
-    if_true_block->AddSuccessor(exit_block);
-    if_false_block->AddSuccessor(exit_block);
-    graph->SetExitBlock(exit_block);
-
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    if_block->AddInstruction(&cmp_lt);
-    // We insert a temporary to separate the HIf from the HLessThan and force
-    // the materialization of the condition.
-    HTemporary force_materialization(0);
-    if_block->AddInstruction(&force_materialization);
-    HIf if_lt(&cmp_lt);
-    if_block->AddInstruction(&if_lt);
-
-    HIntConstant* cst_lt = graph->GetIntConstant(1);
-    HReturn ret_lt(cst_lt);
-    if_true_block->AddInstruction(&ret_lt);
-    HIntConstant* cst_ge = graph->GetIntConstant(0);
-    HReturn ret_ge(cst_ge);
-    if_false_block->AddInstruction(&ret_ge);
-
-    auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-      HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-      block->InsertInstructionBefore(move, block->GetLastInstruction());
-    };
+TEST_F(CodegenTest, MaterializedCondition1) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that condition are materialized correctly. A materialized condition
+    // should yield `1` if it evaluated to true, and `0` otherwise.
+    // We force the materialization of comparisons for different combinations of
+
+    // inputs and check the results.
+
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
+
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
+
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(code_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
+
+      entry_block->AddSuccessor(code_block);
+      code_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
+
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      code_block->AddInstruction(&cmp_lt);
+      HReturn ret(&cmp_lt);
+      code_block->AddInstruction(&ret);
+
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
+  }
+}
 
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+TEST_F(CodegenTest, MaterializedCondition2) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that HIf correctly interprets a materialized condition.
+    // We force the materialization of comparisons for different combinations of
+    // inputs. An HIf takes the materialized combination as input and returns a
+    // value that we verify.
+
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
+
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+
+      HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_block);
+      HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_true_block);
+      HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_false_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
+
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddSuccessor(if_block);
+      if_block->AddSuccessor(if_true_block);
+      if_block->AddSuccessor(if_false_block);
+      if_true_block->AddSuccessor(exit_block);
+      if_false_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
+
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      if_block->AddInstruction(&cmp_lt);
+      // We insert a temporary to separate the HIf from the HLessThan and force
+      // the materialization of the condition.
+      HTemporary force_materialization(0);
+      if_block->AddInstruction(&force_materialization);
+      HIf if_lt(&cmp_lt);
+      if_block->AddInstruction(&if_lt);
+
+      HIntConstant* cst_lt = graph->GetIntConstant(1);
+      HReturn ret_lt(cst_lt);
+      if_true_block->AddInstruction(&ret_lt);
+      HIntConstant* cst_ge = graph->GetIntConstant(0);
+      HReturn ret_ge(cst_ge);
+      if_false_block->AddInstruction(&ret_ge);
+
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
   }
 }
 
-TEST_P(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
     Instruction::DIV_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 2);
+  TestCode(data, true, 2);
 }
 
 // Helper method.
@@ -933,80 +907,55 @@ static void TestComparison(IfCondition condition,
   block->AddInstruction(comparison);
   block->AddInstruction(new (&allocator) HReturn(comparison));
 
-  auto hook_before_codegen = [](HGraph*) {
-  };
-  RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
-}
-
-TEST_P(CodegenTest, ComparisonsInt) {
-  const InstructionSet target_isa = GetParam();
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+  TransformToSsa(graph);
+  RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
+}
+
+TEST_F(CodegenTest, ComparisonsInt) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+      }
     }
   }
 }
 
-TEST_P(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
-  const InstructionSet target_isa = GetParam();
-  if (target_isa == kMips || target_isa == kMips64) {
-    return;
-  }
-
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    if (target_isa == kMips || target_isa == kMips64) {
+      continue;
     }
-  }
-}
 
-static ::std::vector<InstructionSet> GetTargetISAs() {
-  ::std::vector<InstructionSet> v;
-  // Add all ISAs that are executable on hardware or on simulator.
-  const ::std::vector<InstructionSet> executable_isa_candidates = {
-    kArm,
-    kArm64,
-    kThumb2,
-    kX86,
-    kX86_64,
-    kMips,
-    kMips64
-  };
-
-  for (auto target_isa : executable_isa_candidates) {
-    if (CanExecute(target_isa)) {
-      v.push_back(target_isa);
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+      }
     }
   }
-
-  return v;
 }
 
-INSTANTIATE_TEST_CASE_P(MultipleTargets,
-                        CodegenTest,
-                        ::testing::ValuesIn(GetTargetISAs()));
-
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 86a695b152..e170e37bdd 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -89,15 +89,18 @@ void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
 }
 
 void HDeadCodeElimination::RemoveDeadBlocks() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not eliminate dead blocks if the graph has irreducible loops. We could
+    // support it, but that would require changes in our loop representation to handle
+    // multiple entry points. We decided it was not worth the complexity.
+    return;
+  }
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
 
   MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
-  // If the graph has irreducible loops we need to reset all graph analysis we have done
-  // before: the irreducible loop can be turned into a reducible one.
-  // For simplicity, we do the full computation regardless of the type of the loops.
   bool rerun_dominance_and_loop_analysis = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
@@ -105,9 +108,6 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
   // inside out.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
-    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
-      rerun_dominance_and_loop_analysis = true;
-    }
     int id = block->GetBlockId();
     if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a997fd..feb8b2092a 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -133,8 +133,9 @@ TEST(OptimizerTest, CFG4) {
 
   const uint32_t dominators[] = {
       kInvalidBlockId,
-      0,
-      kInvalidBlockId
+      3,
+      kInvalidBlockId,
+      0
   };
 
   TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 9439ba0c8d..31136772c7 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -484,6 +484,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
         loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
+  if (loop_information->GetSuspendCheck() == nullptr) {
+    AddError(StringPrintf(
+        "Loop with header %d does not have a suspend check.",
+        loop_header->GetBlockId()));
+  }
+
+  if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+    AddError(StringPrintf(
+        "Loop header %d does not have the loop suspend check as the first instruction.",
+        loop_header->GetBlockId()));
+  }
+
   // Ensure the loop header has only one incoming branch and the remaining
   // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().size();
@@ -589,6 +601,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
     }
   }
 
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
   for (HEnvironment* environment = instruction->GetEnvironment();
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71952..d5305646a8 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
@@ -199,7 +199,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 293282edbb..2e79df1b84 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -356,12 +356,12 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
       compare, invoke_instruction->GetDexPc());
   // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(load_class, cursor);
+    bb_cursor->InsertInstructionAfter(field_get, cursor);
   } else {
-    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+    bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(field_get, load_class);
-  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(load_class, field_get);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
   bb_cursor->InsertInstructionAfter(deoptimize, compare);
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
 
@@ -419,7 +419,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
     VLOG(compiler) << "Method " << PrettyMethod(method)
-                   << " is too big to inline";
+                   << " is too big to inline: "
+                   << code_item->insns_size_in_code_units_
+                   << " > "
+                   << inline_max_code_units;
     return false;
   }
 
@@ -639,9 +642,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
 
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    if (block->IsLoopHeader()) {
+
+    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+      // Don't inline methods with irreducible loops, they could prevent some
+      // optimizations to run.
       VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                     << " could not be inlined because it contains a loop";
+                     << " could not be inlined because it contains an irreducible loop";
       return false;
     }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6bbc751bee..4bcfc54791 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -30,6 +30,15 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio
                                                                      HInstruction* array,
                                                                      HInstruction* index,
                                                                      int access_size) {
+  if (kEmitCompilerReadBarrier) {
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    //
+    // TODO: Handle this case properly in the ARM64 code generator and
+    // re-enable this optimization; otherwise, remove this TODO.
+    // b/26601270
+    return;
+  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 9f50d1814e..3bf3f7ffae 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -85,9 +85,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
                             InvokeDexCallingConventionVisitor* calling_convention_visitor) {
     if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
       HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
-      // When we do not run baseline, explicit clinit checks triggered by static
-      // invokes must have been pruned by art::PrepareForRegisterAllocation.
-      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+      // Explicit clinit checks triggered by static invokes must have been
+      // pruned by art::PrepareForRegisterAllocation.
+      DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
     }
 
     if (invoke->GetNumberOfArguments() == 0) {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 854d92a409..adf8734214 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -167,11 +167,7 @@ void HGraph::ClearDominanceInformation() {
 void HGraph::ClearLoopInformation() {
   SetHasIrreducibleLoops(false);
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsLoopHeader()) {
-      current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck());
-    }
-    current->SetLoopInformation(nullptr);
+    it.Current()->SetLoopInformation(nullptr);
   }
 }
 
@@ -180,6 +176,14 @@ void HBasicBlock::ClearDominanceInformation() {
   dominator_ = nullptr;
 }
 
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+  HInstruction* instruction = GetFirstInstruction();
+  while (instruction->IsParallelMove()) {
+    instruction = instruction->GetNext();
+  }
+  return instruction;
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
@@ -284,9 +288,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
 
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
-  // loop.
+  // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+  // this graph.
   size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
-  if (number_of_incomings != 1) {
+  if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
     HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -457,6 +462,10 @@ void HGraph::SimplifyCFG() {
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
+    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
+      // a loop got dismantled. Just remove the suspend check.
+      block->RemoveInstruction(block->GetFirstInstruction());
     }
   }
 }
@@ -1829,6 +1838,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
     DCHECK(GetBlocks()[0]->IsEntryBlock());
     DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
+    DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
     invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
@@ -1887,7 +1897,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
     // Update the meta information surrounding blocks:
     // (1) the graph they are now in,
     // (2) the reverse post order of that graph,
-    // (3) the potential loop information they are now in,
+    // (3) their potential loop information, inner and outer,
     // (4) try block membership.
     // Note that we do not need to update catch phi inputs because they
     // correspond to the register file of the outer method which the inlinee
@@ -1916,15 +1926,24 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
     for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
       HBasicBlock* current = it.Current();
       if (current != exit_block_ && current != entry_block_ && current != first) {
-        DCHECK(!current->IsInLoop());
         DCHECK(current->GetTryCatchInformation() == nullptr);
         DCHECK(current->GetGraph() == this);
         current->SetGraph(outer_graph);
         outer_graph->AddBlock(current);
         outer_graph->reverse_post_order_[++index_of_at] = current;
-        if (loop_info != nullptr) {
+        if (!current->IsInLoop()) {
           current->SetLoopInformation(loop_info);
-          for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+        } else if (current->IsLoopHeader()) {
+          // Clear the information of which blocks are contained in that loop. Since the
+          // information is stored as a bit vector based on block ids, we have to update
+          // it, as those block ids were specific to the callee graph and we are now adding
+          // these blocks to the caller graph.
+          current->GetLoopInformation()->ClearAllBlocks();
+        }
+        if (current->IsInLoop()) {
+          for (HLoopInformationOutwardIterator loop_it(*current);
+               !loop_it.Done();
+               loop_it.Advance()) {
             loop_it.Current()->Add(current);
           }
         }
@@ -1937,7 +1956,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
     outer_graph->AddBlock(to);
     outer_graph->reverse_post_order_[++index_of_at] = to;
     if (loop_info != nullptr) {
-      to->SetLoopInformation(loop_info);
+      if (!to->IsInLoop()) {
+        to->SetLoopInformation(loop_info);
+      }
       for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
         loop_it.Current()->Add(to);
       }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 859d570b29..5246fd1f05 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -689,6 +689,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
   void Add(HBasicBlock* block);
   void Remove(HBasicBlock* block);
 
+  void ClearAllBlocks() {
+    blocks_.ClearAllBits();
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -860,6 +864,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  HInstruction* GetFirstInstructionDisregardMoves() const;
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.push_back(block);
     block->predecessors_.push_back(this);
@@ -3687,19 +3693,13 @@ class HInvokeStaticOrDirect : public HInvoke {
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HNewInstance* GetThisArgumentOfStringInit() const {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
-    return InputAt(index)->AsNewInstance();
-  }
-
-  void RemoveThisArgumentOfStringInit() {
+  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
     DCHECK(IsStringInit());
     size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
+    HInstruction* input = InputAt(index);
     RemoveAsUserOfInput(index);
     inputs_.pop_back();
+    return input;
   }
 
   // Is this a call to a static method whose declaring class has an
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bb840eabdd..fffd00535c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -127,7 +127,7 @@ class PassObserver : public ValueObject {
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
-        visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+        visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
@@ -305,30 +305,19 @@ class OptimizingCompiler FINAL : public Compiler {
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Whether we should run any optimization or register allocation. If false, will
-  // just run the code generation after the graph was built.
-  const bool run_optimizations_;
-
   // Create a 'CompiledMethod' for an optimized graph.
-  CompiledMethod* EmitOptimized(ArenaAllocator* arena,
-                                CodeVectorAllocator* code_allocator,
-                                CodeGenerator* codegen,
-                                CompilerDriver* driver) const;
-
-  // Create a 'CompiledMethod' for a non-optimized graph.
-  CompiledMethod* EmitBaseline(ArenaAllocator* arena,
-                               CodeVectorAllocator* code_allocator,
-                               CodeGenerator* codegen,
-                               CompilerDriver* driver) const;
+  CompiledMethod* Emit(ArenaAllocator* arena,
+                       CodeVectorAllocator* code_allocator,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver) const;
 
   // Try compiling a method and return the code generator used for
   // compiling it.
   // This method:
   // 1) Builds the graph. Returns null if it failed to build it.
-  // 2) If `run_optimizations_` is set:
-  //    2.1) Transform the graph to SSA. Returns null if it failed.
-  //    2.2) Run optimizations on the graph, including register allocator.
-  // 3) Generate code with the `code_allocator` provided.
+  // 2) Transforms the graph to SSA. Returns null if it failed.
+  // 3) Runs optimizations on the graph, including register allocator.
+  // 4) Generates code with the `code_allocator` provided.
   CodeGenerator* TryCompile(ArenaAllocator* arena,
                             CodeVectorAllocator* code_allocator,
                             const DexFile::CodeItem* code_item,
@@ -350,21 +339,19 @@ class OptimizingCompiler FINAL : public Compiler {
 static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
 
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
-    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
-      run_optimizations_(
-          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
 
 void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
   if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
     std::ios_base::openmode cfg_file_mode =
-        driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+        driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
   }
   if (driver->GetDumpStats()) {
@@ -577,17 +564,6 @@ static void RunOptimizations(HGraph* graph,
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
-  size_t size = vector.size();
-  size_t aligned_size = RoundUp(size, 4);
-  for (; size < aligned_size; ++size) {
-    vector.push_back(0);
-  }
-  return ArrayRef<const uint8_t>(vector);
-}
-
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -601,10 +577,10 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen)
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
-                                                  CodeVectorAllocator* code_allocator,
-                                                  CodeGenerator* codegen,
-                                                  CompilerDriver* compiler_driver) const {
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+                                         CodeVectorAllocator* code_allocator,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   stack_map.resize(codegen->ComputeStackMapsSize());
@@ -630,39 +606,6 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::EmitBaseline(
-    ArenaAllocator* arena,
-    CodeVectorAllocator* code_allocator,
-    CodeGenerator* codegen,
-    CompilerDriver* compiler_driver) const {
-  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
-  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildMappingTable(&mapping_table);
-  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildVMapTable(&vmap_table);
-  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
-
-  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
-      compiler_driver,
-      codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
-      // Follow Quick's behavior and set the frame size to zero if it is
-      // considered "empty" (see the definition of
-      // art::CodeGenerator::HasEmptyFrame).
-      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
-      codegen->GetCoreSpillMask(),
-      codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(),
-      AlignVectorSize(mapping_table),
-      AlignVectorSize(vmap_table),
-      AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>(linker_patches));
-  return compiled_method;
-}
-
 CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
                                               CodeVectorAllocator* code_allocator,
                                               const DexFile::CodeItem* code_item,
@@ -775,41 +718,37 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
 
   VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
 
-  if (run_optimizations_) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScopeCollection handles(soa.Self());
-    ScopedThreadSuspension sts(soa.Self(), kNative);
-
-    {
-      PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
-      if (result != kAnalysisSuccess) {
-        switch (result) {
-          case kAnalysisFailThrowCatchLoop:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
-            break;
-          case kAnalysisFailAmbiguousArrayOp:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
-            break;
-          case kAnalysisSuccess:
-            UNREACHABLE();
-        }
-        pass_observer.SetGraphInBadState();
-        return nullptr;
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  ScopedThreadSuspension sts(soa.Self(), kNative);
+
+  {
+    PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
+    GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
+    if (result != kAnalysisSuccess) {
+      switch (result) {
+        case kAnalysisFailThrowCatchLoop:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+          break;
+        case kAnalysisFailAmbiguousArrayOp:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+          break;
+        case kAnalysisSuccess:
+          UNREACHABLE();
       }
+      pass_observer.SetGraphInBadState();
+      return nullptr;
     }
-
-    RunOptimizations(graph,
-                     codegen.get(),
-                     compiler_driver,
-                     compilation_stats_.get(),
-                     dex_compilation_unit,
-                     &pass_observer,
-                     &handles);
-    codegen->CompileOptimized(code_allocator);
-  } else {
-    codegen->CompileBaseline(code_allocator);
   }
+
+  RunOptimizations(graph,
+                   codegen.get(),
+                   compiler_driver,
+                   compilation_stats_.get(),
+                   dex_compilation_unit,
+                   &pass_observer,
+                   &handles);
+  codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
 
   if (kArenaAllocatorCountAllocations) {
@@ -861,11 +800,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                    dex_cache));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
-      if (run_optimizations_) {
-        method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
-      } else {
-        method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
-      }
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
     }
   } else {
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -928,8 +863,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
   {
     // Go to native so that we don't block GC during compilation.
     ScopedThreadSuspension sts(self, kNative);
-
-    DCHECK(run_optimizations_);
     codegen.reset(
         TryCompile(&arena,
                    &code_allocator,
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3ae6..be470ccb7d 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
   // This function is used to reduce the dependencies in the graph after
   // (from -> to) has been performed. Since we ensure there is no move with the same
-  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // destination, (to -> X) cannot be blocked while (from -> X) might still be
   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
   // a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2bae4bc5c8..a966b62b4f 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -72,8 +72,7 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
   float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
   double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
 
-  static constexpr bool kIsBaseline = false;
-  codegen->SetupBlockedRegisters(kIsBaseline);
+  codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
   physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
   // Always reserve for the current method and the graph's max out registers.
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7494e336b1..165d09d1a5 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@ bool SsaBuilder::FixAmbiguousArrayOps() {
   return true;
 }
 
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+  if (GetGraph()->IsDebuggable()) {
+    // Do not perform the optimization for consistency with the interpreter
+    // which always allocates an object for new-instance of String.
+    return;
+  }
+
+  for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsStringAlloc());
+
+    // Replace NewInstance of String with NullConstant if not used prior to
+    // calling StringFactory. In case of deoptimization, the interpreter is
+    // expected to skip null check on the `this` argument of the StringFactory call.
+    if (!new_instance->HasNonEnvironmentUses()) {
+      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+      // Remove LoadClass if not needed any more.
+      HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+      DCHECK(load_class != nullptr);
+      DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      if (!load_class->HasUses()) {
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
+    }
+  }
+}
+
 GraphAnalysisResult SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block
   // visited (with the exception of loops) in order to create the right environment
@@ -487,7 +515,15 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Clear locals.
+  // 11) Step 1) replaced uses of NewInstances of String with the results of
+  // their corresponding StringFactory calls. Unless the String objects are used
+  // before they are initialized, they can be replaced with NullConstant.
+  // Note that this optimization is valid only if unsimplified code does not use
+  // the uninitialized value because we assume execution can be deoptimized at
+  // any safepoint. We must therefore perform it before any other optimizations.
+  RemoveRedundantUninitializedStrings();
+
+  // 12) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -891,12 +927,21 @@ void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStringInit()) {
     // This is a StringFactory call which acts as a String constructor. Its
     // result replaces the empty String pre-allocated by NewInstance.
-    HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
-    invoke->RemoveThisArgumentOfStringInit();
+    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
+
+    // Replacing the NewInstance might render it redundant. Keep a list of these
+    // to be visited once it is clear whether it is has remaining uses.
+    if (arg_this->IsNewInstance()) {
+      uninitialized_strings_.push_back(arg_this->AsNewInstance());
+    } else {
+      DCHECK(arg_this->IsPhi());
+      // NewInstance is not the direct input of the StringFactory call. It might
+      // be redundant but optimizing this case is not worth the effort.
+    }
 
-    // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
     for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == new_instance) {
+      if ((*current_locals_)[vreg] == arg_this) {
         (*current_locals_)[vreg] = invoke;
       }
     }
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 28eef6a40c..ccef8ea380 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -57,6 +57,7 @@ class SsaBuilder : public HGraphVisitor {
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
@@ -105,6 +106,8 @@ class SsaBuilder : public HGraphVisitor {
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
   HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
 
+  void RemoveRedundantUninitializedStrings();
+
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
@@ -119,6 +122,7 @@ class SsaBuilder : public HGraphVisitor {
 
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
+  ArenaVector<HNewInstance*> uninitialized_strings_;
 
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;