39 files changed, 1998 insertions, 734 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 05e1356ed8..35ec7d41ff 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -368,7 +368,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item
   if (native_debuggable) {
     const uint32_t num_instructions = code_item.insns_size_in_code_units_;
     native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
-    native_debug_info_locations->ClearAllBits();
     FindNativeDebugInfoLocations(code_item, native_debug_info_locations);
   }
 
@@ -443,23 +442,15 @@ void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_i
     }
   };
   dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations);
-  // Add native debug info at the start of every basic block.
-  for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) {
-    if (FindBlockStartingAt(pc) != nullptr) {
-      locations->SetBit(pc);
-    }
-  }
   // Instruction-specific tweaks.
   const Instruction* const begin = Instruction::At(code_item.insns_);
   const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_);
   for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
     switch (inst->Opcode()) {
-      case Instruction::MOVE_EXCEPTION:
-      case Instruction::MOVE_RESULT:
-      case Instruction::MOVE_RESULT_WIDE:
-      case Instruction::MOVE_RESULT_OBJECT: {
-        // The compiler checks that there are no instructions before those.
-        // So generate HNativeDebugInfo after them instead.
+      case Instruction::MOVE_EXCEPTION: {
+        // Stop in native debugger after the exception has been moved.
+        // The compiler also expects the move at the start of basic block so
+        // we do not want to interfere by inserting native-debug-info before it.
         locations->ClearBit(inst->GetDexPc(code_item.insns_));
         const Instruction* next = inst->Next();
         if (next < end) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c2c8ccfc56..967d156cf6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -195,6 +195,8 @@ void CodeGenerator::GenerateSlowPaths() {
     if (disasm_info_ != nullptr) {
       code_start = GetAssembler()->CodeSize();
     }
+    // Record the dex pc at start of slow path (required for java line number mapping).
+    MaybeRecordNativeDebugInfo(nullptr /* instruction */, slow_path->GetDexPc());
     slow_path->EmitNativeCode(this);
     if (disasm_info_ != nullptr) {
       disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize());
@@ -226,6 +228,10 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
     // errors where we reference that label.
     if (block->IsSingleJump()) continue;
     Bind(block);
+    // This ensures that we have correct native line mapping for all native instructions.
+    // It is necessary to make stepping over a statement work. Otherwise, any initial
+    // instructions (e.g. moves) would be assumed to be the start of next statement.
+    MaybeRecordNativeDebugInfo(nullptr /* instruction */, block->GetDexPc());
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       DisassemblyScope disassembly_scope(current, *this);
@@ -733,7 +739,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
   uint32_t native_pc = GetAssembler()->CodeSize();
 
   if (instruction == nullptr) {
-    // For stack overflow checks.
+    // For stack overflow checks and native-debug-info entries without dex register
+    // mapping (i.e. start of basic block or start of slow path).
     stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0);
     stack_map_stream_.EndStackMapEntry();
     return;
@@ -808,6 +815,16 @@ bool CodeGenerator::HasStackMapAtCurrentPc() {
   return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
 }
 
+void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc) {
+  if (GetCompilerOptions().GetNativeDebuggable() && dex_pc != kNoDexPc) {
+    if (HasStackMapAtCurrentPc()) {
+      // Ensure that we do not collide with the stack map of the previous instruction.
+      GenerateNop();
+    }
+    RecordPcInfo(instruction, dex_pc);
+  }
+}
+
 void CodeGenerator::RecordCatchBlockInfo() {
   ArenaAllocator* arena = graph_->GetArena();
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 49c193e7bf..9297fc956f 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -69,7 +69,7 @@ class CodeAllocator {
 
 class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
  public:
-  SlowPathCode() {
+  explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
       saved_core_stack_offsets_[i] = kRegisterNotSaved;
       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
@@ -106,9 +106,15 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
   Label* GetEntryLabel() { return &entry_label_; }
   Label* GetExitLabel() { return &exit_label_; }
 
+  uint32_t GetDexPc() const {
+    return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
+  }
+
  protected:
   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
   static constexpr uint32_t kRegisterNotSaved = -1;
+  // The instruction where this slow path is happening.
+  HInstruction* instruction_;
   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
 
@@ -267,6 +273,8 @@ class CodeGenerator {
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
   // Check whether we have already recorded mapping at this PC.
   bool HasStackMapAtCurrentPc();
+  // Record extra stack maps if we support native debugging.
+  void MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc);
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -440,6 +448,8 @@ class CodeGenerator {
   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
 
+  virtual void GenerateNop() = 0;
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 87f52c6f21..10d3426a58 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -64,7 +64,7 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 class NullCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
+  explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -83,13 +83,12 @@ class NullCheckSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM"; }
 
  private:
-  HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
 };
 
 class DivZeroCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -108,14 +107,13 @@ class DivZeroCheckSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
 };
 
 class SuspendCheckSlowPathARM : public SlowPathCode {
  public:
   SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -144,7 +142,6 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
@@ -157,7 +154,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
 class BoundsCheckSlowPathARM : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction)
-      : instruction_(instruction) {}
+      : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -188,8 +185,6 @@ class BoundsCheckSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM);
 };
 
@@ -199,7 +194,7 @@ class LoadClassSlowPathARM : public SlowPathCode {
                        HInstruction* at,
                        uint32_t dex_pc,
                        bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -253,7 +248,7 @@ class LoadClassSlowPathARM : public SlowPathCode {
 
 class LoadStringSlowPathARM : public SlowPathCode {
  public:
-  explicit LoadStringSlowPathARM(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -264,7 +259,8 @@ class LoadStringSlowPathARM : public SlowPathCode {
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -277,15 +273,13 @@ class LoadStringSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM);
 };
 
 class TypeCheckSlowPathARM : public SlowPathCode {
  public:
   TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -340,7 +334,6 @@ class TypeCheckSlowPathARM : public SlowPathCode {
   bool IsFatal() const OVERRIDE { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
@@ -349,7 +342,7 @@ class TypeCheckSlowPathARM : public SlowPathCode {
 class DeoptimizationSlowPathARM : public SlowPathCode {
  public:
   explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
-    : instruction_(instruction) {}
+    : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -365,13 +358,12 @@ class DeoptimizationSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
 
  private:
-  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
 class ArraySetSlowPathARM : public SlowPathCode {
  public:
-  explicit ArraySetSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -410,8 +402,6 @@ class ArraySetSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
@@ -419,7 +409,7 @@ class ArraySetSlowPathARM : public SlowPathCode {
 class ReadBarrierMarkSlowPathARM : public SlowPathCode {
  public:
   ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
-      : instruction_(instruction), out_(out), obj_(obj) {
+      : SlowPathCode(instruction), out_(out), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -458,7 +448,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
   }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location obj_;
 
@@ -474,7 +463,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
                                          Location obj,
                                          uint32_t offset,
                                          Location index)
-      : instruction_(instruction),
+      : SlowPathCode(instruction),
         out_(out),
         ref_(ref),
         obj_(obj),
@@ -629,7 +618,6 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
     UNREACHABLE();
   }
 
-  HInstruction* const instruction_;
   const Location out_;
   const Location ref_;
   const Location obj_;
@@ -646,7 +634,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
 class ReadBarrierForRootSlowPathARM : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {
+      : SlowPathCode(instruction), out_(out), root_(root) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -679,7 +667,6 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location root_;
 
@@ -1557,11 +1544,11 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
-  if (codegen_->HasStackMapAtCurrentPc()) {
-    // Ensure that we do not collide with the stack map of the previous instruction.
-    __ nop();
-  }
-  codegen_->RecordPcInfo(info, info->GetDexPc());
+  codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc());
+}
+
+void CodeGeneratorARM::GenerateNop() {
+  __ nop();
 }
 
 void LocationsBuilderARM::HandleCondition(HCondition* cond) {
@@ -6426,6 +6413,33 @@ Literal* CodeGeneratorARM::DeduplicateMethodCodeLiteral(MethodReference target_m
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
+void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations = instr->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
+  Register accumulator =
+      locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex).AsRegister<Register>();
+  Register mul_left =
+      locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex).AsRegister<Register>();
+  Register mul_right =
+      locations->InAt(HMultiplyAccumulate::kInputMulRightIndex).AsRegister<Register>();
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ mla(res, mul_left, mul_right, accumulator);
+  } else {
+    __ mls(res, mul_left, mul_right, accumulator);
+  }
+}
+
 void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index cfd7a3bc14..06e7c0015c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,6 +159,7 @@ class LocationsBuilderARM : public HGraphVisitor {
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -197,6 +198,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -510,6 +512,8 @@ class CodeGeneratorARM : public CodeGenerator {
   // artReadBarrierForRootSlow.
   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
+  void GenerateNop();
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 435ae5e954..25487d2fad 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -219,7 +219,7 @@ void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSum
 
 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -246,14 +246,12 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
 };
 
 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -272,7 +270,6 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
 };
 
@@ -282,7 +279,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
                          HInstruction* at,
                          uint32_t dex_pc,
                          bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeARM64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -337,7 +334,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
 
 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit LoadStringSlowPathARM64(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -348,7 +345,8 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ Mov(calling_convention.GetRegisterAt(0).W(), string_index);
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -362,14 +360,12 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
 };
 
 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit NullCheckSlowPathARM64(HNullCheck* instr) : instruction_(instr) {}
+  explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -388,15 +384,13 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
 
  private:
-  HNullCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
 };
 
 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCodeARM64(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -425,7 +419,6 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
@@ -438,7 +431,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -487,7 +480,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
   bool IsFatal() const { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
@@ -496,7 +488,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
-      : instruction_(instruction) {}
+      : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -512,13 +504,12 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
 
  private:
-  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -557,8 +548,6 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
 };
 
@@ -588,7 +577,7 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
 class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
-      : instruction_(instruction), out_(out), obj_(obj) {
+      : SlowPathCodeARM64(instruction), out_(out), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -627,7 +616,6 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
   }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location obj_;
 
@@ -643,7 +631,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
                                            Location obj,
                                            uint32_t offset,
                                            Location index)
-      : instruction_(instruction),
+      : SlowPathCodeARM64(instruction),
         out_(out),
         ref_(ref),
         obj_(obj),
@@ -804,7 +792,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
     UNREACHABLE();
   }
 
-  HInstruction* const instruction_;
   const Location out_;
   const Location ref_;
   const Location obj_;
@@ -821,7 +808,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {
+      : SlowPathCodeARM64(instruction), out_(out), root_(root) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -865,7 +852,6 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location root_;
 
@@ -1876,6 +1862,36 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instr) {
+  DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  locations->SetInAt(0, Location::RequiresRegister());
+  // There is no immediate variant of negated bitwise instructions in AArch64.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64BitwiseNegatedRight(
+    HArm64BitwiseNegatedRight* instr) {
+  Register dst = OutputRegister(instr);
+  Register lhs = InputRegisterAt(instr, 0);
+  Register rhs = InputRegisterAt(instr, 1);
+
+  switch (instr->GetOpKind()) {
+    case HInstruction::kAnd:
+      __ Bic(dst, lhs, rhs);
+      break;
+    case HInstruction::kOr:
+      __ Orn(dst, lhs, rhs);
+      break;
+    case HInstruction::kXor:
+      __ Eon(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+}
+
 void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
     HArm64DataProcWithShifterOp* instruction) {
   DCHECK(instruction->GetType() == Primitive::kPrimInt ||
@@ -1973,21 +1989,27 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
-                     Location::RequiresRegister());
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+  if (instr->GetOpKind() == HInstruction::kSub &&
+      accumulator->IsConstant() &&
+      accumulator->AsConstant()->IsZero()) {
+    // Don't allocate register for Mneg instruction.
+  } else {
+    locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                       Location::RequiresRegister());
+  }
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   Register res = OutputRegister(instr);
-  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
-  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
-  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+  Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
 
   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
   // This fixup should be carried out for all multiply-accumulate instructions:
@@ -2007,10 +2029,17 @@ void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyA
   }
 
   if (instr->GetOpKind() == HInstruction::kAdd) {
+    Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
     __ Madd(res, mul_left, mul_right, accumulator);
   } else {
     DCHECK(instr->GetOpKind() == HInstruction::kSub);
-    __ Msub(res, mul_left, mul_right, accumulator);
+    HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+    if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) {
+      __ Mneg(res, mul_left, mul_right);
+    } else {
+      Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
+      __ Msub(res, mul_left, mul_right, accumulator);
+    }
   }
 }
 
@@ -3057,11 +3086,11 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
-  if (codegen_->HasStackMapAtCurrentPc()) {
-    // Ensure that we do not collide with the stack map of the previous instruction.
-    __ Nop();
-  }
-  codegen_->RecordPcInfo(info, info->GetDexPc());
+  codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc());
+}
+
+void CodeGeneratorARM64::GenerateNop() {
+  __ Nop();
 }
 
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 360488eb4a..10f1e7f008 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -66,7 +66,8 @@ Location ARM64ReturnLocation(Primitive::Type return_type);
 
 class SlowPathCodeARM64 : public SlowPathCode {
  public:
-  SlowPathCodeARM64() : entry_label_(), exit_label_() {}
+  explicit SlowPathCodeARM64(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
   vixl::Label* GetEntryLabel() { return &entry_label_; }
   vixl::Label* GetExitLabel() { return &exit_label_; }
@@ -195,6 +196,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -284,6 +286,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -532,6 +535,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
   // artReadBarrierForRootSlow.
   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
+  void GenerateNop();
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c500ea4408..8d3d94b79d 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -39,9 +39,6 @@ namespace mips {
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = A0;
 
-// We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr FRegister FTMP = F8;
-
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -149,7 +146,7 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type)
 
 class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -181,14 +178,12 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS);
 };
 
 class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
@@ -210,7 +205,6 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS);
 };
 
@@ -220,7 +214,7 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
                         HInstruction* at,
                         uint32_t dex_pc,
                         bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeMIPS(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -279,7 +273,7 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
 
 class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit LoadStringSlowPathMIPS(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -290,7 +284,8 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                 instruction_,
                                 instruction_->GetDexPc(),
@@ -309,14 +304,12 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS);
 };
 
 class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit NullCheckSlowPathMIPS(HNullCheck* instr) : instruction_(instr) {}
+  explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
@@ -338,15 +331,13 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; }
 
  private:
-  HNullCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS);
 };
 
 class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
   SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCodeMIPS(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
@@ -374,7 +365,6 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
@@ -386,7 +376,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
 
 class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : instruction_(instruction) {}
+  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -437,15 +427,13 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS);
 };
 
 class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
  public:
   explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
-    : instruction_(instruction) {}
+    : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
@@ -462,7 +450,6 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
 
  private:
-  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
@@ -3407,11 +3394,11 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
-  if (codegen_->HasStackMapAtCurrentPc()) {
-    // Ensure that we do not collide with the stack map of the previous instruction.
-    __ Nop();
-  }
-  codegen_->RecordPcInfo(info, info->GetDexPc());
+  codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc());
+}
+
+void CodeGeneratorMIPS::GenerateNop() {
+  __ Nop();
 }
 
 void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index dd0641c7ca..605c794421 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -152,7 +152,8 @@ class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap {
 
 class SlowPathCodeMIPS : public SlowPathCode {
  public:
-  SlowPathCodeMIPS() : entry_label_(), exit_label_() {}
+  explicit SlowPathCodeMIPS(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
   MipsLabel* GetEntryLabel() { return &entry_label_; }
   MipsLabel* GetExitLabel() { return &exit_label_; }
@@ -360,6 +361,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
     UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
   }
 
+  void GenerateNop();
+
  private:
   // Labels for each block that will be compiled.
   MipsLabel* block_labels_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index e3a44f1c96..c2b84b4335 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -37,9 +37,6 @@ namespace mips64 {
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr GpuRegister kMethodRegisterArgument = A0;
 
-// We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr FpuRegister FTMP = F8;
-
 Location Mips64ReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -110,7 +107,7 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type)
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -141,14 +138,12 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
 };
 
 class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -169,7 +164,6 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64);
 };
 
@@ -179,7 +173,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
                           HInstruction* at,
                           uint32_t dex_pc,
                           bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeMIPS64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -234,7 +228,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
 
 class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -245,7 +239,8 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -263,14 +258,12 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64);
 };
 
 class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : instruction_(instr) {}
+  explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -291,15 +284,13 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; }
 
  private:
-  HNullCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64);
 };
 
 class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
   SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCodeMIPS64(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -326,7 +317,6 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
@@ -338,7 +328,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
 
 class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : instruction_(instruction) {}
+  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -384,15 +374,13 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
 };
 
 class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
   explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
-    : instruction_(instruction) {}
+    : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -408,7 +396,6 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
 
  private:
-  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
@@ -2732,11 +2719,11 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
-  if (codegen_->HasStackMapAtCurrentPc()) {
-    // Ensure that we do not collide with the stack map of the previous instruction.
-    __ Nop();
-  }
-  codegen_->RecordPcInfo(info, info->GetDexPc());
+  codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc());
+}
+
+void CodeGeneratorMIPS64::GenerateNop() {
+  __ Nop();
 }
 
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index eb7315aa7a..ba9eaff46f 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -152,7 +152,8 @@ class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap {
 
 class SlowPathCodeMIPS64 : public SlowPathCode {
  public:
-  SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
+  explicit SlowPathCodeMIPS64(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
   Mips64Label* GetEntryLabel() { return &entry_label_; }
   Mips64Label* GetExitLabel() { return &exit_label_; }
@@ -352,6 +353,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
     UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64";
   }
 
+  void GenerateNop();
+
  private:
   // Labels for each block that will be compiled.
   Mips64Label* block_labels_;  // Indexed by block id.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f032f51649..88e42f3faf 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -52,7 +52,7 @@ static constexpr int kFakeReturnRegister = Register(8);
 
 class NullCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {}
+  explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -73,13 +73,12 @@ class NullCheckSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
 
  private:
-  HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
 };
 
 class DivZeroCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -100,13 +99,13 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
 };
 
 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
  public:
-  DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {}
+  DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
+      : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -128,7 +127,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode {
 
 class BoundsCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -160,15 +159,13 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
 };
 
 class SuspendCheckSlowPathX86 : public SlowPathCode {
  public:
   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -199,7 +196,6 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
 
  private:
-  HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
   Label return_label_;
 
@@ -208,7 +204,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
 
 class LoadStringSlowPathX86 : public SlowPathCode {
  public:
-  explicit LoadStringSlowPathX86(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -219,7 +215,8 @@ class LoadStringSlowPathX86 : public SlowPathCode {
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex()));
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                instruction_,
                                instruction_->GetDexPc(),
@@ -234,8 +231,6 @@ class LoadStringSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
 };
 
@@ -245,7 +240,7 @@ class LoadClassSlowPathX86 : public SlowPathCode {
                        HInstruction* at,
                        uint32_t dex_pc,
                        bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -299,7 +294,7 @@ class LoadClassSlowPathX86 : public SlowPathCode {
 class TypeCheckSlowPathX86 : public SlowPathCode {
  public:
   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -356,7 +351,6 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
   bool IsFatal() const OVERRIDE { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
@@ -365,7 +359,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
 class DeoptimizationSlowPathX86 : public SlowPathCode {
  public:
   explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
-    : instruction_(instruction) {}
+    : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -381,13 +375,12 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
 
  private:
-  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
 class ArraySetSlowPathX86 : public SlowPathCode {
  public:
-  explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -426,8 +419,6 @@ class ArraySetSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
@@ -435,7 +426,7 @@ class ArraySetSlowPathX86 : public SlowPathCode {
 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
  public:
   ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
-      : instruction_(instruction), out_(out), obj_(obj) {
+      : SlowPathCode(instruction), out_(out), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -474,7 +465,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
   }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location obj_;
 
@@ -490,7 +480,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
                                          Location obj,
                                          uint32_t offset,
                                          Location index)
-      : instruction_(instruction),
+      : SlowPathCode(instruction),
         out_(out),
         ref_(ref),
         obj_(obj),
@@ -645,7 +635,6 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
     UNREACHABLE();
   }
 
-  HInstruction* const instruction_;
   const Location out_;
   const Location ref_;
   const Location obj_;
@@ -662,7 +651,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {
+      : SlowPathCode(instruction), out_(out), root_(root) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -695,7 +684,6 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location root_;
 
@@ -1649,11 +1637,11 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
-  if (codegen_->HasStackMapAtCurrentPc()) {
-    // Ensure that we do not collide with the stack map of the previous instruction.
-    __ nop();
-  }
-  codegen_->RecordPcInfo(info, info->GetDexPc());
+  codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc());
+}
+
+void CodeGeneratorX86::GenerateNop() {
+  __ nop();
 }
 
 void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -3453,9 +3441,8 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
           GenerateDivRemWithAnyConstant(instruction);
         }
       } else {
-        SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
-              is_div);
+        SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(
+            instruction, out.AsRegister<Register>(), is_div);
         codegen_->AddSlowPath(slow_path);
 
         Register second_reg = second.AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 63e9b2fc9c..0795f3b530 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -540,6 +540,7 @@ class CodeGeneratorX86 : public CodeGenerator {
     }
   }
 
+  void GenerateNop();
 
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f3c40b109f..bb24c6f59c 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -56,7 +56,7 @@ static constexpr int kC2ConditionMask = 0x400;
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
+  explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
@@ -77,13 +77,12 @@ class NullCheckSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
 
  private:
-  HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
 };
 
 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
@@ -104,14 +103,13 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
 };
 
 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
  public:
-  DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div)
-      : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
+  DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
+      : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -145,7 +143,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
  public:
   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
@@ -176,7 +174,6 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
 
  private:
-  HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
   Label return_label_;
 
@@ -186,7 +183,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
-    : instruction_(instruction) {}
+    : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -218,8 +215,6 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
 };
 
@@ -229,7 +224,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCode {
                           HInstruction* at,
                           uint32_t dex_pc,
                           bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -286,7 +281,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCode {
 
 class LoadStringSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -297,8 +292,8 @@ class LoadStringSlowPathX86_64 : public SlowPathCode {
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
-            Immediate(instruction_->GetStringIndex()));
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -312,15 +307,13 @@ class LoadStringSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
 };
 
 class TypeCheckSlowPathX86_64 : public SlowPathCode {
  public:
   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -379,7 +372,6 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
   bool IsFatal() const OVERRIDE { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
@@ -388,7 +380,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
  public:
   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
-      : instruction_(instruction) {}
+      : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
@@ -404,13 +396,12 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
 
  private:
-  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
 class ArraySetSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -449,8 +440,6 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
@@ -458,7 +447,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
  public:
   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
-      : instruction_(instruction), out_(out), obj_(obj) {
+      : SlowPathCode(instruction), out_(out), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -497,7 +486,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
   }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location obj_;
 
@@ -513,7 +501,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
                                             Location obj,
                                             uint32_t offset,
                                             Location index)
-      : instruction_(instruction),
+      : SlowPathCode(instruction),
         out_(out),
         ref_(ref),
         obj_(obj),
@@ -667,7 +655,6 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
     UNREACHABLE();
   }
 
-  HInstruction* const instruction_;
   const Location out_;
   const Location ref_;
   const Location obj_;
@@ -684,7 +671,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {
+      : SlowPathCode(instruction), out_(out), root_(root) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -716,7 +703,6 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
   const Location out_;
   const Location root_;
 
@@ -1632,11 +1618,11 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
-  if (codegen_->HasStackMapAtCurrentPc()) {
-    // Ensure that we do not collide with the stack map of the previous instruction.
-    __ nop();
-  }
-  codegen_->RecordPcInfo(info, info->GetDexPc());
+  codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc());
+}
+
+void CodeGeneratorX86_64::GenerateNop() {
+  __ nop();
 }
 
 void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
@@ -3546,7 +3532,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
   } else {
     SlowPathCode* slow_path =
         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
-            out.AsRegister(), type, is_div);
+            instruction, out.AsRegister(), type, is_div);
     codegen_->AddSlowPath(slow_path);
 
     CpuRegister second_reg = second.AsRegister<CpuRegister>();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 97f6f84236..b3d27e194a 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -513,6 +513,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     }
   }
 
+  void GenerateNop();
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index c0263e4e5b..b9638f2027 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -436,17 +436,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+  void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
     if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
       StartAttributeStream("shift") << instruction->GetShiftAmount();
     }
   }
-
-  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
-    StartAttributeStream("kind") << instruction->GetOpKind();
-  }
 #endif
 
   bool IsPass(const char* name) {
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 37f2d79536..82a898a9f1 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -379,7 +379,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferShl(Inducti
                                                                          Primitive::Type type) {
   // Transfer over a shift left: treat shift by restricted constant as equivalent multiplication.
   int64_t value = -1;
-  if (a != nullptr && IsIntAndGet(b, &value)) {
+  if (a != nullptr && IsExact(b, &value)) {
     // Obtain the constant needed for the multiplication. This yields an existing instruction
     // if the constants is already there. Otherwise, this has a side effect on the HIR.
     // The restriction on the shift factor avoids generating a negative constant
@@ -546,14 +546,17 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop,
     // Analyze condition with induction at left-hand-side (e.g. i < U).
     InductionInfo* lower_expr = a->op_b;
     InductionInfo* upper_expr = b;
-    InductionInfo* stride = a->op_a;
+    InductionInfo* stride_expr = a->op_a;
+    // Constant stride?
     int64_t stride_value = 0;
-    if (!IsIntAndGet(stride, &stride_value)) {
+    if (!IsExact(stride_expr, &stride_value)) {
       return;
     }
-    // Rewrite condition i != U into i < U or i > U if end condition is reached exactly.
-    if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLT)) ||
-                           (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGT)))) {
+    // Rewrite condition i != U into strict end condition i < U or i > U if this end condition
+    // is reached exactly (tested by verifying if the loop has a unit stride and the non-strict
+    // condition would be always taken).
+    if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLE)) ||
+                           (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGE)))) {
       cmp = stride_value > 0 ? kCondLT : kCondGT;
     }
     // Normalize a linear loop control with a nonzero stride:
@@ -561,7 +564,7 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop,
     //   stride < 0, either i > U or i >= U
     if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) ||
         (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) {
-      VisitTripCount(loop, lower_expr, upper_expr, stride, stride_value, type, cmp);
+      VisitTripCount(loop, lower_expr, upper_expr, stride_expr, stride_value, type, cmp);
     }
   }
 }
@@ -569,7 +572,7 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop,
 void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop,
                                            InductionInfo* lower_expr,
                                            InductionInfo* upper_expr,
-                                           InductionInfo* stride,
+                                           InductionInfo* stride_expr,
                                            int64_t stride_value,
                                            Primitive::Type type,
                                            IfCondition cmp) {
@@ -612,9 +615,10 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop,
       trip_count = CreateInvariantOp(kAdd, trip_count, CreateConstant(1, type));
     }
     // Compensate for stride.
-    trip_count = CreateInvariantOp(kAdd, trip_count, stride);
+    trip_count = CreateInvariantOp(kAdd, trip_count, stride_expr);
   }
-  trip_count = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, trip_count, lower_expr), stride);
+  trip_count = CreateInvariantOp(
+      kDiv, CreateInvariantOp(kSub, trip_count, lower_expr), stride_expr);
   // Assign the trip-count expression to the loop control. Clients that use the information
   // should be aware that the expression is only valid under the conditions listed above.
   InductionOp tcKind = kTripCountInBodyUnsafe;  // needs both tests
@@ -644,14 +648,25 @@ bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr,
                                     IfCondition cmp) {
   int64_t lower_value;
   int64_t upper_value;
-  if (IsIntAndGet(lower_expr, &lower_value) && IsIntAndGet(upper_expr, &upper_value)) {
-    switch (cmp) {
-      case kCondLT: return lower_value <  upper_value;
-      case kCondLE: return lower_value <= upper_value;
-      case kCondGT: return lower_value >  upper_value;
-      case kCondGE: return lower_value >= upper_value;
-      default:      LOG(FATAL) << "CONDITION UNREACHABLE";
-    }
+  switch (cmp) {
+    case kCondLT:
+      return IsAtMost(lower_expr, &lower_value)
+          && IsAtLeast(upper_expr, &upper_value)
+          && lower_value < upper_value;
+    case kCondLE:
+      return IsAtMost(lower_expr, &lower_value)
+          && IsAtLeast(upper_expr, &upper_value)
+          && lower_value <= upper_value;
+    case kCondGT:
+      return IsAtLeast(lower_expr, &lower_value)
+          && IsAtMost(upper_expr, &upper_value)
+          && lower_value > upper_value;
+    case kCondGE:
+      return IsAtLeast(lower_expr, &lower_value)
+          && IsAtMost(upper_expr, &upper_value)
+          && lower_value >= upper_value;
+    default:
+      LOG(FATAL) << "CONDITION UNREACHABLE";
   }
   return false;  // not certain, may be untaken
 }
@@ -660,25 +675,23 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr,
                                      int64_t stride_value,
                                      Primitive::Type type,
                                      IfCondition cmp) {
-  const int64_t min = type == Primitive::kPrimInt
-      ? std::numeric_limits<int32_t>::min()
-      : std::numeric_limits<int64_t>::min();
-  const int64_t max = type == Primitive::kPrimInt
-        ? std::numeric_limits<int32_t>::max()
-        : std::numeric_limits<int64_t>::max();
+  const int64_t min = type == Primitive::kPrimInt ? std::numeric_limits<int32_t>::min()
+                                                  : std::numeric_limits<int64_t>::min();
+  const int64_t max = type == Primitive::kPrimInt ? std::numeric_limits<int32_t>::max()
+                                                  : std::numeric_limits<int64_t>::max();
   // Some rules under which it is certain at compile-time that the loop is finite.
   int64_t value;
   switch (cmp) {
     case kCondLT:
       return stride_value == 1 ||
-          (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value + 1));
+          (IsAtMost(upper_expr, &value) && value <= (max - stride_value + 1));
     case kCondLE:
-      return (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value));
+      return (IsAtMost(upper_expr, &value) && value <= (max - stride_value));
     case kCondGT:
       return stride_value == -1 ||
-          (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1));
+          (IsAtLeast(upper_expr, &value) && value >= (min - stride_value - 1));
     case kCondGE:
-      return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value));
+      return (IsAtLeast(upper_expr, &value) && value >= (min - stride_value));
     default:
       LOG(FATAL) << "CONDITION UNREACHABLE";
   }
@@ -733,7 +746,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
   // More exhaustive simplifications are done by later phases once induction nodes are
   // translated back into HIR code (e.g. by loop optimizations or BCE).
   int64_t value = -1;
-  if (IsIntAndGet(a, &value)) {
+  if (IsExact(a, &value)) {
     if (value == 0) {
       // Simplify 0 + b = b, 0 * b = 0.
       if (op == kAdd) {
@@ -750,7 +763,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
       }
     }
   }
-  if (IsIntAndGet(b, &value)) {
+  if (IsExact(b, &value)) {
     if (value == 0) {
       // Simplify a + 0 = a, a - 0 = a, a * 0 = 0, -0 = 0.
       if (op == kAdd || op == kSub) {
@@ -784,29 +797,16 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
   return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr);
 }
 
-bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
-  if (info != nullptr && info->induction_class == kInvariant) {
-    // A direct constant fetch.
-    if (info->operation == kFetch) {
-      DCHECK(info->fetch);
-      if (info->fetch->IsIntConstant()) {
-        *value = info->fetch->AsIntConstant()->GetValue();
-        return true;
-      } else if (info->fetch->IsLongConstant()) {
-        *value = info->fetch->AsLongConstant()->GetValue();
-        return true;
-      }
-    }
-    // Use range analysis to resolve compound values.
-    InductionVarRange range(this);
-    int32_t min_val = 0;
-    int32_t max_val = 0;
-    if (range.IsConstantRange(info, &min_val, &max_val) && min_val == max_val) {
-      *value = min_val;
-      return true;
-    }
-  }
-  return false;
+bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) {
+  return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value);
+}
+
+bool HInductionVarAnalysis::IsAtMost(InductionInfo* info, int64_t* value) {
+  return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtMost, value);
+}
+
+bool HInductionVarAnalysis::IsAtLeast(InductionInfo* info, int64_t* value) {
+  return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtLeast, value);
 }
 
 bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1,
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 84d5d82568..94d2646aec 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -189,7 +189,9 @@ class HInductionVarAnalysis : public HOptimization {
   InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
 
   // Constants.
-  bool IsIntAndGet(InductionInfo* info, int64_t* value);
+  bool IsExact(InductionInfo* info, /*out*/ int64_t* value);
+  bool IsAtMost(InductionInfo* info, /*out*/ int64_t* value);
+  bool IsAtLeast(InductionInfo* info, /*out*/ int64_t* value);
 
   // Helpers.
   static bool InductionEqual(InductionInfo* info1, InductionInfo* info2);
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 9566c29adf..f9b6910acd 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -45,17 +45,14 @@ static bool IsSafeDiv(int32_t c1, int32_t c2) {
   return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
 }
 
-/** Returns true for 32/64-bit integral constant. */
-static bool IsIntAndGet(HInstruction* instruction, int32_t* value) {
+/** Returns true for 32/64-bit constant instruction. */
+static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
   if (instruction->IsIntConstant()) {
     *value = instruction->AsIntConstant()->GetValue();
     return true;
   } else if (instruction->IsLongConstant()) {
-    const int64_t c = instruction->AsLongConstant()->GetValue();
-    if (CanLongValueFitIntoInt(c)) {
-      *value = static_cast<int32_t>(c);
-      return true;
-    }
+    *value = instruction->AsLongConstant()->GetValue();
+    return true;
   }
   return false;
 }
@@ -65,8 +62,9 @@ static bool IsIntAndGet(HInstruction* instruction, int32_t* value) {
  * because length >= 0 is true. This makes it more likely the bound is useful to clients.
  */
 static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
-  int32_t value;
-  if (v.a_constant > 1 &&
+  int64_t value;
+  if (v.is_known &&
+      v.a_constant > 1 &&
       v.instruction->IsDiv() &&
       v.instruction->InputAt(0)->IsArrayLength() &&
       IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
@@ -75,6 +73,16 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
   return v;
 }
 
+/** Helper method to test for a constant value. */
+static bool IsConstantValue(InductionVarRange::Value v) {
+  return v.is_known && v.a_constant == 0;
+}
+
+/** Helper method to test for same constant value. */
+static bool IsSameConstantValue(InductionVarRange::Value v1, InductionVarRange::Value v2) {
+  return IsConstantValue(v1) && IsConstantValue(v2) && v1.b_constant == v2.b_constant;
+}
+
 /** Helper method to insert an instruction. */
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
@@ -99,29 +107,45 @@ bool InductionVarRange::GetInductionRange(HInstruction* context,
                                           /*out*/Value* max_val,
                                           /*out*/bool* needs_finite_test) {
   HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop != nullptr) {
-    // Set up loop information.
-    HBasicBlock* header = loop->GetHeader();
-    bool in_body = context->GetBlock() != header;
-    HInductionVarAnalysis::InductionInfo* info =
-        induction_analysis_->LookupInfo(loop, instruction);
-    HInductionVarAnalysis::InductionInfo* trip =
-        induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    // Find range.
-    *min_val = GetVal(info, trip, in_body, /* is_min */ true);
-    *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
-    *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-    return true;
+  if (loop == nullptr) {
+    return false;  // no loop
+  }
+  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
+  if (info == nullptr) {
+    return false;  // no induction information
   }
-  return false;  // Nothing known
+  // Set up loop information.
+  HBasicBlock* header = loop->GetHeader();
+  bool in_body = context->GetBlock() != header;
+  HInductionVarAnalysis::InductionInfo* trip =
+      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+  // Find range.
+  *min_val = GetVal(info, trip, in_body, /* is_min */ true);
+  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+  *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+  return true;
 }
 
-bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const {
-  Value v1 = RefineOuter(*min_val, /* is_min */ true);
-  Value v2 = RefineOuter(*max_val, /* is_min */ false);
-  if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) {
-    *min_val = v1;
-    *max_val = v2;
+bool InductionVarRange::RefineOuter(/*in-out*/ Value* min_val,
+                                    /*in-out*/ Value* max_val) const {
+  Value v1_min = RefineOuter(*min_val, /* is_min */ true);
+  Value v2_max = RefineOuter(*max_val, /* is_min */ false);
+  // The refined range is safe if both sides refine the same instruction. Otherwise, since two
+  // different ranges are combined, the new refined range is safe to pass back to the client if
+  // the extremes of the computed ranges ensure no arithmetic wrap-around anomalies occur.
+  if (min_val->instruction != max_val->instruction) {
+    Value v1_max = RefineOuter(*min_val, /* is_min */ false);
+    Value v2_min = RefineOuter(*max_val, /* is_min */ true);
+    if (!IsConstantValue(v1_max) ||
+        !IsConstantValue(v2_min) ||
+        v1_max.b_constant > v2_min.b_constant) {
+      return false;
+    }
+  }
+  // Did something change?
+  if (v1_min.instruction != min_val->instruction || v2_max.instruction != max_val->instruction) {
+    *min_val = v1_min;
+    *max_val = v2_max;
     return true;
   }
   return false;
@@ -164,6 +188,46 @@ void InductionVarRange::GenerateTakenTest(HInstruction* context,
 // Private class methods.
 //
 
+bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
+                                   ConstantRequest request,
+                                   /*out*/ int64_t *value) const {
+  if (info != nullptr) {
+    // A direct 32-bit or 64-bit constant fetch. This immediately satisfies
+    // any of the three requests (kExact, kAtMost, and KAtLeast).
+    if (info->induction_class == HInductionVarAnalysis::kInvariant &&
+        info->operation == HInductionVarAnalysis::kFetch) {
+      if (IsIntAndGet(info->fetch, value)) {
+        return true;
+      }
+    }
+    // Try range analysis while traversing outward on loops.
+    bool in_body = true;  // no known trip count
+    Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true);
+    Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false);
+    do {
+      // Make sure *both* extremes are known to avoid arithmetic wrap-around anomalies.
+      if (IsConstantValue(v_min) && IsConstantValue(v_max) && v_min.b_constant <= v_max.b_constant) {
+        if ((request == kExact && v_min.b_constant == v_max.b_constant) || request == kAtMost) {
+          *value = v_max.b_constant;
+          return true;
+        } else if (request == kAtLeast) {
+          *value = v_min.b_constant;
+          return true;
+        }
+      }
+    } while (RefineOuter(&v_min, &v_max));
+    // Exploit array length + c >= c, with c <= 0 to avoid arithmetic wrap-around anomalies
+    // (e.g. array length == maxint and c == 1 would yield minint).
+    if (request == kAtLeast) {
+      if (v_min.a_constant == 1 && v_min.b_constant <= 0 && v_min.instruction->IsArrayLength()) {
+        *value = v_min.b_constant;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
   if (info != nullptr) {
     if (info->induction_class == HInductionVarAnalysis::kLinear) {
@@ -206,12 +270,10 @@ InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::Ind
   if (trip != nullptr) {
     HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a;
     if (trip_expr->operation == HInductionVarAnalysis::kSub) {
-      int32_t min_value = 0;
-      int32_t stride_value = 0;
-      if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) {
+      int64_t stride_value = 0;
+      if (IsConstant(info->op_a, kExact, &stride_value)) {
         if (!is_min && stride_value == 1) {
-          // Test original trip's negative operand (trip_expr->op_b) against
-          // the offset of the linear induction.
+          // Test original trip's negative operand (trip_expr->op_b) against offset of induction.
           if (HInductionVarAnalysis::InductionEqual(trip_expr->op_b, info->op_b)) {
             // Analyze cancelled trip with just the positive operand (trip_expr->op_a).
             HInductionVarAnalysis::InductionInfo cancelled_trip(
@@ -219,8 +281,7 @@ InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::Ind
             return GetVal(&cancelled_trip, trip, in_body, is_min);
           }
         } else if (is_min && stride_value == -1) {
-          // Test original trip's positive operand (trip_expr->op_a) against
-          // the offset of the linear induction.
+          // Test original trip's positive operand (trip_expr->op_a) against offset of induction.
           if (HInductionVarAnalysis::InductionEqual(trip_expr->op_a, info->op_b)) {
             // Analyze cancelled trip with just the negative operand (trip_expr->op_b).
             HInductionVarAnalysis::InductionInfo neg(
@@ -248,14 +309,16 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
                                                      bool is_min) const {
   // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
   // more likely range analysis will compare the same instructions as terminal nodes.
-  int32_t value;
-  if (IsIntAndGet(instruction, &value)) {
-    return Value(value);
+  int64_t value;
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value))  {
+    return Value(static_cast<int32_t>(value));
   } else if (instruction->IsAdd()) {
-    if (IsIntAndGet(instruction->InputAt(0), &value)) {
-      return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, in_body, is_min));
-    } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
-      return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
+    if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+      return AddValue(Value(static_cast<int32_t>(value)),
+                      GetFetch(instruction->InputAt(1), trip, in_body, is_min));
+    } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+      return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
+                      Value(static_cast<int32_t>(value)));
     }
   } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
     return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
@@ -331,29 +394,30 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  // Try to refine certain failure.
-  if (v1_min.a_constant && v1_max.a_constant) {
-    v1_min = RefineOuter(v1_min, /* is_min */ true);
-    v1_max = RefineOuter(v1_max, /* is_min */ false);
-  }
-  // Positive or negative range?
-  if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
-    // Positive range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? MulValue(v1_min, v2_min)
-                    : MulValue(v1_max, v2_max);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? MulValue(v1_max, v2_min)
-                    : MulValue(v1_min, v2_max);
+  // Try to refine first operand.
+  if (!IsConstantValue(v1_min) && !IsConstantValue(v1_max)) {
+    RefineOuter(&v1_min, &v1_max);
+  }
+  // Constant times range.
+  if (IsSameConstantValue(v1_min, v1_max)) {
+    return MulRangeAndConstant(v2_min, v2_max, v1_min, is_min);
+  } else if (IsSameConstantValue(v2_min, v2_max)) {
+    return MulRangeAndConstant(v1_min, v1_max, v2_min, is_min);
+  }
+  // Positive range vs. positive or negative range.
+  if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? MulValue(v1_min, v2_min) : MulValue(v1_max, v2_max);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? MulValue(v1_max, v2_min) : MulValue(v1_min, v2_max);
     }
-  } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) {
-    // Negative range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? MulValue(v1_min, v2_max)
-                    : MulValue(v1_max, v2_min);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? MulValue(v1_max, v2_max)
-                    : MulValue(v1_min, v2_min);
+  }
+  // Negative range vs. positive or negative range.
+  if (IsConstantValue(v1_max) && v1_max.b_constant <= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? MulValue(v1_min, v2_max) : MulValue(v1_max, v2_min);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? MulValue(v1_max, v2_max) : MulValue(v1_min, v2_min);
     }
   }
   return Value();
@@ -368,43 +432,41 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  // Positive or negative range?
-  if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
-    // Positive range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? DivValue(v1_min, v2_max)
-                    : DivValue(v1_max, v2_min);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? DivValue(v1_max, v2_max)
-                    : DivValue(v1_min, v2_min);
+  // Range divided by constant.
+  if (IsSameConstantValue(v2_min, v2_max)) {
+    return DivRangeAndConstant(v1_min, v1_max, v2_min, is_min);
+  }
+  // Positive range vs. positive or negative range.
+  if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? DivValue(v1_min, v2_max) : DivValue(v1_max, v2_min);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? DivValue(v1_max, v2_max) : DivValue(v1_min, v2_min);
     }
-  } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) {
-    // Negative range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? DivValue(v1_min, v2_min)
-                    : DivValue(v1_max, v2_max);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? DivValue(v1_max, v2_min)
-                    : DivValue(v1_min, v2_max);
+  }
+  // Negative range vs. positive or negative range.
+  if (IsConstantValue(v1_max) && v1_max.b_constant <= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? DivValue(v1_min, v2_min) : DivValue(v1_max, v2_max);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? DivValue(v1_max, v2_min) : DivValue(v1_min, v2_max);
     }
   }
   return Value();
 }
 
-bool InductionVarRange::IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
-                                        int32_t *min_value,
-                                        int32_t *max_value) const {
-  bool in_body = true;  // no known trip count
-  Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true);
-  Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false);
-  do {
-    if (v_min.is_known && v_min.a_constant == 0 && v_max.is_known && v_max.a_constant == 0) {
-      *min_value = v_min.b_constant;
-      *max_value = v_max.b_constant;
-      return true;
-    }
-  } while (RefineOuter(&v_min, &v_max));
-  return false;
+InductionVarRange::Value InductionVarRange::MulRangeAndConstant(Value v_min,
+                                                                Value v_max,
+                                                                Value c,
+                                                                bool is_min) const {
+  return is_min == (c.b_constant >= 0) ? MulValue(v_min, c) : MulValue(v_max, c);
+}
+
+InductionVarRange::Value InductionVarRange::DivRangeAndConstant(Value v_min,
+                                                                Value v_max,
+                                                                Value c,
+                                                                bool is_min) const {
+  return is_min == (c.b_constant >= 0) ? DivValue(v_min, c) : DivValue(v_max, c);
 }
 
 InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
@@ -471,22 +533,25 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is
 }
 
 InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const {
-  if (v.instruction != nullptr) {
-    HLoopInformation* loop =
-        v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
-    if (loop != nullptr) {
-      // Set up loop information.
-      bool in_body = true;  // use is always in body of outer loop
-      HInductionVarAnalysis::InductionInfo* info =
-          induction_analysis_->LookupInfo(loop, v.instruction);
-      HInductionVarAnalysis::InductionInfo* trip =
-          induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction());
-      // Try to refine "a x instruction + b" with outer loop range information on instruction.
-      return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)),
-                      Value(v.b_constant));
-    }
+  if (v.instruction == nullptr) {
+    return v;  // nothing to refine
   }
-  return v;
+  HLoopInformation* loop =
+      v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (loop == nullptr) {
+    return v;  // no loop
+  }
+  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, v.instruction);
+  if (info == nullptr) {
+    return v;  // no induction information
+  }
+  // Set up loop information.
+  HBasicBlock* header = loop->GetHeader();
+  bool in_body = true;  // inner always in more outer
+  HInductionVarAnalysis::InductionInfo* trip =
+      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+  // Try to refine "a x instruction + b" with outer loop range information on instruction.
+  return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), Value(v.b_constant));
 }
 
 bool InductionVarRange::GenerateCode(HInstruction* context,
@@ -499,44 +564,45 @@ bool InductionVarRange::GenerateCode(HInstruction* context,
                                      /*out*/bool* needs_finite_test,
                                      /*out*/bool* needs_taken_test) const {
   HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop != nullptr) {
-    // Set up loop information.
-    HBasicBlock* header = loop->GetHeader();
-    bool in_body = context->GetBlock() != header;
-    HInductionVarAnalysis::InductionInfo* info =
-        induction_analysis_->LookupInfo(loop, instruction);
-    if (info == nullptr) {
-      return false;  // nothing to analyze
-    }
-    HInductionVarAnalysis::InductionInfo* trip =
-        induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    // Determine what tests are needed. A finite test is needed if the evaluation code uses the
-    // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
-    // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
-    // code does not use the trip-count explicitly (since there could be an implicit relation
-    // between e.g. an invariant subscript and a not-taken condition).
-    *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-    *needs_taken_test = IsBodyTripCount(trip);
-    // Code generation for taken test: generate the code when requested or otherwise analyze
-    // if code generation is feasible when taken test is needed.
-    if (taken_test != nullptr) {
-      return GenerateCode(
-          trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
-    } else if (*needs_taken_test) {
-      if (!GenerateCode(
-          trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
-        return false;
-      }
+  if (loop == nullptr) {
+    return false;  // no loop
+  }
+  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
+  if (info == nullptr) {
+    return false;  // no induction information
+  }
+  // Set up loop information.
+  HBasicBlock* header = loop->GetHeader();
+  bool in_body = context->GetBlock() != header;
+  HInductionVarAnalysis::InductionInfo* trip =
+      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+  if (trip == nullptr) {
+    return false;  // codegen relies on trip count
+  }
+  // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+  // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+  // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+  // code does not use the trip-count explicitly (since there could be an implicit relation
+  // between e.g. an invariant subscript and a not-taken condition).
+  *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+  *needs_taken_test = IsBodyTripCount(trip);
+  // Code generation for taken test: generate the code when requested or otherwise analyze
+  // if code generation is feasible when taken test is needed.
+  if (taken_test != nullptr) {
+    return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
+  } else if (*needs_taken_test) {
+    if (!GenerateCode(
+        trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
+      return false;
     }
-    // Code generation for lower and upper.
-    return
-        // Success on lower if invariant (not set), or code can be generated.
-        ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
-            GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
-        // And success on upper.
-        GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
   }
-  return false;
+  // Code generation for lower and upper.
+  return
+      // Success on lower if invariant (not set), or code can be generated.
+      ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
+          GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
+      // And success on upper.
+      GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
 }
 
 bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
@@ -639,9 +705,8 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
       case HInductionVarAnalysis::kLinear: {
         // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
         // to avoid arithmetic wrap-around situations that are hard to guard against.
-        int32_t min_value = 0;
-        int32_t stride_value = 0;
-        if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) {
+        int64_t stride_value = 0;
+        if (IsConstant(info->op_a, kExact, &stride_value)) {
           if (stride_value == 1 || stride_value == -1) {
             const bool is_min_a = stride_value == 1 ? is_min : !is_min;
             if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
@@ -666,7 +731,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
         // Wrap-around and periodic inductions are restricted to constants only, so that extreme
         // values are easy to test at runtime without complications of arithmetic wrap-around.
         Value extreme = GetVal(info, trip, in_body, is_min);
-        if (extreme.is_known && extreme.a_constant == 0) {
+        if (IsConstantValue(extreme)) {
           if (graph != nullptr) {
             *result = graph->GetIntConstant(extreme.b_constant);
           }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 3cb7b4bfd5..0af41560ff 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -69,7 +69,8 @@ class InductionVarRange {
                          /*out*/ bool* needs_finite_test);
 
   /** Refines the values with induction of next outer loop. Returns true on change. */
-  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const;
+  bool RefineOuter(/*in-out*/ Value* min_val,
+                   /*in-out*/ Value* max_val) const;
 
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
@@ -116,6 +117,23 @@ class InductionVarRange {
                          /*out*/ HInstruction** taken_test);
 
  private:
+  /*
+   * Enum used in IsConstant() request.
+   */
+  enum ConstantRequest {
+    kExact,
+    kAtMost,
+    kAtLeast
+  };
+
+  /**
+   * Returns true if exact or upper/lower bound on the given induction
+   * information is known as a 64-bit constant, which is returned in value.
+   */
+  bool IsConstant(HInductionVarAnalysis::InductionInfo* info,
+                  ConstantRequest request,
+                  /*out*/ int64_t *value) const;
+
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
   bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
   bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
@@ -143,9 +161,8 @@ class InductionVarRange {
                bool in_body,
                bool is_min) const;
 
-  bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
-                       int32_t *min_value,
-                       int32_t *max_value) const;
+  Value MulRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const;
+  Value DivRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const;
 
   Value AddValue(Value v1, Value v2) const;
   Value SubValue(Value v1, Value v2) const;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 55a654e301..c5c33bd9bc 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -215,10 +215,16 @@ class InductionVarRangeTest : public CommonCompilerTest {
     return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
-  bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
-                       int32_t* min_value,
-                       int32_t* max_value) {
-    return range_.IsConstantRange(info, min_value, max_value);
+  bool IsExact(HInductionVarAnalysis::InductionInfo* info, int64_t* value) {
+    return range_.IsConstant(info, InductionVarRange::kExact, value);
+  }
+
+  bool IsAtMost(HInductionVarAnalysis::InductionInfo* info, int64_t* value) {
+    return range_.IsConstant(info, InductionVarRange::kAtMost, value);
+  }
+
+  bool IsAtLeast(HInductionVarAnalysis::InductionInfo* info, int64_t* value) {
+    return range_.IsConstant(info, InductionVarRange::kAtLeast, value);
   }
 
   Value AddValue(Value v1, Value v2) { return range_.AddValue(v1, v2); }
@@ -249,6 +255,34 @@ class InductionVarRangeTest : public CommonCompilerTest {
 // Tests on private methods.
 //
 
+TEST_F(InductionVarRangeTest, IsConstant) {
+  int64_t value;
+  // Constant.
+  EXPECT_TRUE(IsExact(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  EXPECT_TRUE(IsAtMost(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  EXPECT_TRUE(IsAtLeast(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  // Constant trivial range.
+  EXPECT_TRUE(IsExact(CreateRange(111, 111), &value));
+  EXPECT_EQ(111, value);
+  EXPECT_TRUE(IsAtMost(CreateRange(111, 111), &value));
+  EXPECT_EQ(111, value);
+  EXPECT_TRUE(IsAtLeast(CreateRange(111, 111), &value));
+  EXPECT_EQ(111, value);
+  // Constant non-trivial range.
+  EXPECT_FALSE(IsExact(CreateRange(11, 22), &value));
+  EXPECT_TRUE(IsAtMost(CreateRange(11, 22), &value));
+  EXPECT_EQ(22, value);
+  EXPECT_TRUE(IsAtLeast(CreateRange(11, 22), &value));
+  EXPECT_EQ(11, value);
+  // Symbolic.
+  EXPECT_FALSE(IsExact(CreateFetch(x_), &value));
+  EXPECT_FALSE(IsAtMost(CreateFetch(x_), &value));
+  EXPECT_FALSE(IsAtLeast(CreateFetch(x_), &value));
+}
+
 TEST_F(InductionVarRangeTest, TripCountProperties) {
   EXPECT_FALSE(NeedsTripCount(nullptr));
   EXPECT_FALSE(NeedsTripCount(CreateConst(1)));
@@ -367,6 +401,10 @@ TEST_F(InductionVarRangeTest, GetMinMaxPeriodic) {
 }
 
 TEST_F(InductionVarRangeTest, GetMulMin) {
+  ExpectEqual(Value(-14), GetMul(CreateConst(2), CreateRange(-7, 8), true));
+  ExpectEqual(Value(-16), GetMul(CreateConst(-2), CreateRange(-7, 8), true));
+  ExpectEqual(Value(-14), GetMul(CreateRange(-7, 8), CreateConst(2), true));
+  ExpectEqual(Value(-16), GetMul(CreateRange(-7, 8), CreateConst(-2), true));
   ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true));
   ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), true));
@@ -379,6 +417,10 @@ TEST_F(InductionVarRangeTest, GetMulMin) {
 }
 
 TEST_F(InductionVarRangeTest, GetMulMax) {
+  ExpectEqual(Value(16), GetMul(CreateConst(2), CreateRange(-7, 8), false));
+  ExpectEqual(Value(14), GetMul(CreateConst(-2), CreateRange(-7, 8), false));
+  ExpectEqual(Value(16), GetMul(CreateRange(-7, 8), CreateConst(2), false));
+  ExpectEqual(Value(14), GetMul(CreateRange(-7, 8), CreateConst(-2), false));
   ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false));
   ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), false));
@@ -391,6 +433,8 @@ TEST_F(InductionVarRangeTest, GetMulMax) {
 }
 
 TEST_F(InductionVarRangeTest, GetDivMin) {
+  ExpectEqual(Value(-5), GetDiv(CreateRange(-10, 20), CreateConst(2), true));
+  ExpectEqual(Value(-10), GetDiv(CreateRange(-10, 20), CreateConst(-2), true));
   ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true));
   ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), true));
@@ -403,6 +447,8 @@ TEST_F(InductionVarRangeTest, GetDivMin) {
 }
 
 TEST_F(InductionVarRangeTest, GetDivMax) {
+  ExpectEqual(Value(10), GetDiv(CreateRange(-10, 20), CreateConst(2), false));
+  ExpectEqual(Value(5), GetDiv(CreateRange(-10, 20), CreateConst(-2), false));
   ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false));
   ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), false));
@@ -414,18 +460,6 @@ TEST_F(InductionVarRangeTest, GetDivMax) {
   ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
-TEST_F(InductionVarRangeTest, IsConstantRange) {
-  int32_t min_value;
-  int32_t max_value;
-  ASSERT_TRUE(IsConstantRange(CreateConst(12345), &min_value, &max_value));
-  EXPECT_EQ(12345, min_value);
-  EXPECT_EQ(12345, max_value);
-  ASSERT_TRUE(IsConstantRange(CreateRange(1, 2), &min_value, &max_value));
-  EXPECT_EQ(1, min_value);
-  EXPECT_EQ(2, max_value);
-  EXPECT_FALSE(IsConstantRange(CreateFetch(x_), &min_value, &max_value));
-}
-
 TEST_F(InductionVarRangeTest, AddValue) {
   ExpectEqual(Value(110), AddValue(Value(10), Value(100)));
   ExpectEqual(Value(-5), AddValue(Value(x_, 1, -4), Value(x_, -1, -1)));
@@ -459,6 +493,24 @@ TEST_F(InductionVarRangeTest, MulValue) {
   ExpectEqual(Value(), MulValue(Value(90000), Value(-90000)));  // unsafe
 }
 
+TEST_F(InductionVarRangeTest, MulValueSpecial) {
+  const int32_t min_value = std::numeric_limits<int32_t>::min();
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+
+  // Unsafe.
+  ExpectEqual(Value(), MulValue(Value(min_value), Value(min_value)));
+  ExpectEqual(Value(), MulValue(Value(min_value), Value(-1)));
+  ExpectEqual(Value(), MulValue(Value(min_value), Value(max_value)));
+  ExpectEqual(Value(), MulValue(Value(max_value), Value(max_value)));
+
+  // Safe.
+  ExpectEqual(Value(min_value), MulValue(Value(min_value), Value(1)));
+  ExpectEqual(Value(max_value), MulValue(Value(max_value), Value(1)));
+  ExpectEqual(Value(-max_value), MulValue(Value(max_value), Value(-1)));
+  ExpectEqual(Value(-1), MulValue(Value(1), Value(-1)));
+  ExpectEqual(Value(1), MulValue(Value(-1), Value(-1)));
+}
+
 TEST_F(InductionVarRangeTest, DivValue) {
   ExpectEqual(Value(25), DivValue(Value(100), Value(4)));
   ExpectEqual(Value(), DivValue(Value(x_, 1, -4), Value(x_, 1, -1)));
@@ -468,6 +520,23 @@ TEST_F(InductionVarRangeTest, DivValue) {
   ExpectEqual(Value(), DivValue(Value(1), Value(0)));  // unsafe
 }
 
+TEST_F(InductionVarRangeTest, DivValueSpecial) {
+  const int32_t min_value = std::numeric_limits<int32_t>::min();
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+
+  // Unsafe.
+  ExpectEqual(Value(), DivValue(Value(min_value), Value(-1)));
+
+  // Safe.
+  ExpectEqual(Value(1), DivValue(Value(min_value), Value(min_value)));
+  ExpectEqual(Value(1), DivValue(Value(max_value), Value(max_value)));
+  ExpectEqual(Value(min_value), DivValue(Value(min_value), Value(1)));
+  ExpectEqual(Value(max_value), DivValue(Value(max_value), Value(1)));
+  ExpectEqual(Value(-max_value), DivValue(Value(max_value), Value(-1)));
+  ExpectEqual(Value(-1), DivValue(Value(1), Value(-1)));
+  ExpectEqual(Value(1), DivValue(Value(-1), Value(-1)));
+}
+
 TEST_F(InductionVarRangeTest, MinValue) {
   ExpectEqual(Value(10), MinValue(Value(10), Value(100)));
   ExpectEqual(Value(x_, 1, -4), MinValue(Value(x_, 1, -4), Value(x_, 1, -1)));
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
new file mode 100644
index 0000000000..db1f9a79aa
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_arm.h"
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+namespace arm {
+
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+    RecordSimplification();
+  }
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
new file mode 100644
index 0000000000..379b95d6ae
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class InstructionSimplifierArmVisitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  void VisitMul(HMul* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+};
+
+
+class InstructionSimplifierArm : public HOptimization {
+ public:
+  InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+
+  void Run() OVERRIDE {
+    InstructionSimplifierArmVisitor visitor(graph_, stats_);
+    visitor.VisitReversePostOrder();
+  }
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 4bcfc54791..c2bbdccc29 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -17,6 +17,7 @@
 #include "instruction_simplifier_arm64.h"
 
 #include "common_arm64.h"
+#include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
 
 namespace art {
@@ -179,67 +180,53 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruc
   return true;
 }
 
-bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
-    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
-  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
-  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
-  DCHECK_NE(input_binop, input_other);
-  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
-    return false;
-  }
-
-  // Try to interpret patterns like
-  //    a * (b <+/-> 1)
-  // as
-  //    (a * b) <+/-> a
-  HInstruction* input_a = input_other;
-  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
-  HInstruction::InstructionKind op_kind;
-
-  if (input_binop->IsAdd()) {
-    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
-      // Interpret
-      //    a * (b + 1)
-      // as
-      //    (a * b) + a
-      input_b = input_binop->GetLeastConstantLeft();
-      op_kind = HInstruction::kAdd;
-    }
-  } else {
-    DCHECK(input_binop->IsSub());
-    if (input_binop->GetRight()->IsConstant() &&
-        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
-      // Interpret
-      //    a * (b - (-1))
-      // as
-      //    a + (a * b)
-      input_b = input_binop->GetLeft();
-      op_kind = HInstruction::kAdd;
-    } else if (input_binop->GetLeft()->IsConstant() &&
-               input_binop->GetLeft()->AsConstant()->IsOne()) {
-      // Interpret
-      //    a * (1 - b)
-      // as
-      //    a - (a * b)
-      input_b = input_binop->GetRight();
-      op_kind = HInstruction::kSub;
+bool InstructionSimplifierArm64Visitor::TryMergeNegatedInput(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // Only consider the case where there is exactly one Not, with 2 Not's De
+  // Morgan's laws should be applied instead.
+  if (left->IsNot() ^ right->IsNot()) {
+    HInstruction* hnot = (left->IsNot() ? left : right);
+    HInstruction* hother = (left->IsNot() ? right : left);
+
+    // Only do the simplification if the Not has only one use and can thus be
+    // safely removed. Even though ARM64 negated bitwise operations do not have
+    // an immediate variant (only register), we still do the simplification when
+    // `hother` is a constant, because it removes an instruction if the constant
+    // cannot be encoded as an immediate:
+    //   mov r0, #large_constant
+    //   neg r2, r1
+    //   and r0, r0, r2
+    // becomes:
+    //   mov r0, #large_constant
+    //   bic r0, r0, r1
+    if (hnot->HasOnlyOneNonEnvironmentUse()) {
+      // Replace code looking like
+      //    NOT tmp, mask
+      //    AND dst, src, tmp   (respectively ORR, EOR)
+      // with
+      //    BIC dst, src, mask  (respectively ORN, EON)
+      HInstruction* src = hnot->AsNot()->GetInput();
+
+      HArm64BitwiseNegatedRight* neg_op = new (GetGraph()->GetArena())
+          HArm64BitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
+
+      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
+      hnot->GetBlock()->RemoveInstruction(hnot);
+      RecordSimplification();
+      return true;
     }
   }
 
-  if (input_b == nullptr) {
-    // We did not find a pattern we can optimize.
-    return false;
-  }
-
-  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
-      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
-
-  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
-  input_binop->GetBlock()->RemoveInstruction(input_binop);
-
   return false;
 }
 
+void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
+  TryMergeNegatedInput(instruction);
+}
+
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -255,76 +242,13 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
-  Primitive::Type type = instruction->GetType();
-  if (!Primitive::IsIntOrLongType(type)) {
-    return;
-  }
-
-  HInstruction* use = instruction->HasNonEnvironmentUses()
-      ? instruction->GetUses().GetFirst()->GetUser()
-      : nullptr;
-
-  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
-    // Replace code looking like
-    //    MUL tmp, x, y
-    //    SUB dst, acc, tmp
-    // with
-    //    MULSUB dst, acc, x, y
-    // Note that we do not want to (unconditionally) perform the merge when the
-    // multiplication has multiple uses and it can be merged in all of them.
-    // Multiple uses could happen on the same control-flow path, and we would
-    // then increase the amount of work. In the future we could try to evaluate
-    // whether all uses are on different control-flow paths (using dominance and
-    // reverse-dominance information) and only perform the merge when they are.
-    HInstruction* accumulator = nullptr;
-    HBinaryOperation* binop = use->AsBinaryOperation();
-    HInstruction* binop_left = binop->GetLeft();
-    HInstruction* binop_right = binop->GetRight();
-    // Be careful after GVN. This should not happen since the `HMul` has only
-    // one use.
-    DCHECK_NE(binop_left, binop_right);
-    if (binop_right == instruction) {
-      accumulator = binop_left;
-    } else if (use->IsAdd()) {
-      DCHECK_EQ(binop_left, instruction);
-      accumulator = binop_right;
-    }
-
-    if (accumulator != nullptr) {
-      HArm64MultiplyAccumulate* mulacc =
-          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
-                                                                binop->GetKind(),
-                                                                accumulator,
-                                                                instruction->GetLeft(),
-                                                                instruction->GetRight());
-
-      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-      DCHECK(!instruction->HasUses());
-      instruction->GetBlock()->RemoveInstruction(instruction);
-      RecordSimplification();
-      return;
-    }
-  }
-
-  // Use multiply accumulate instruction for a few simple patterns.
-  // We prefer not applying the following transformations if the left and
-  // right inputs perform the same operation.
-  // We rely on GVN having squashed the inputs if appropriate. However the
-  // results are still correct even if that did not happen.
-  if (instruction->GetLeft() == instruction->GetRight()) {
-    return;
+  if (TryCombineMultiplyAccumulate(instruction, kArm64)) {
+    RecordSimplification();
   }
+}
 
-  HInstruction* left = instruction->GetLeft();
-  HInstruction* right = instruction->GetRight();
-  if ((right->IsAdd() || right->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
-    return;
-  }
-  if ((left->IsAdd() || left->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
-    return;
-  }
+void InstructionSimplifierArm64Visitor::VisitOr(HOr* instruction) {
+  TryMergeNegatedInput(instruction);
 }
 
 void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
@@ -359,5 +283,9 @@ void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) {
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) {
+  TryMergeNegatedInput(instruction);
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index b7f490bb8c..cf8458713f 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,18 +51,21 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
-  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
-                                           HBinaryOperation* input_binop,
-                                           HInstruction* input_other);
+  // For bitwise operations (And/Or/Xor) with a negated input, try to use
+  // a negated bitwise instruction.
+  bool TryMergeNegatedInput(HBinaryOperation* op);
 
   // HInstruction visitors, sorted alphabetically.
+  void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
   void VisitShl(HShl* instruction) OVERRIDE;
   void VisitShr(HShr* instruction) OVERRIDE;
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
+  void VisitXor(HXor* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
new file mode 100644
index 0000000000..45d196fa6d
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+
+namespace {
+
+bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                         HBinaryOperation* input_binop,
+                                         HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+  HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return true;
+}
+
+}  // namespace
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
+  Primitive::Type type = mul->GetType();
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      if (type != Primitive::kPrimInt) {
+        return false;
+      }
+      break;
+    case kArm64:
+      if (!Primitive::IsIntOrLongType(type)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  HInstruction* use = mul->HasNonEnvironmentUses()
+      ? mul->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    if (use->IsAdd() || use->IsSub()) {
+      // Replace code looking like
+      //    MUL tmp, x, y
+      //    SUB dst, acc, tmp
+      // with
+      //    MULSUB dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HBinaryOperation* binop = use->AsBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // Be careful after GVN. This should not happen since the `HMul` has only
+      // one use.
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      if (accumulator != nullptr) {
+        HMultiplyAccumulate* mulacc =
+            new (arena) HMultiplyAccumulate(type,
+                                            binop->GetKind(),
+                                            accumulator,
+                                            mul->GetLeft(),
+                                            mul->GetRight());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    } else if (use->IsNeg() && isa != kArm) {
+      HMultiplyAccumulate* mulacc =
+          new (arena) HMultiplyAccumulate(type,
+                                          HInstruction::kSub,
+                                          mul->GetBlock()->GetGraph()->GetConstant(type, 0),
+                                          mul->GetLeft(),
+                                          mul->GetRight());
+
+      use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
+      DCHECK(!mul->HasUses());
+      mul->GetBlock()->RemoveInstruction(mul);
+      return true;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (mul->GetLeft() == mul->GetRight()) {
+    return false;
+  }
+
+  HInstruction* left = mul->GetLeft();
+  HInstruction* right = mul->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
+    return true;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
+    return true;
+  }
+  return false;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
new file mode 100644
index 0000000000..9832ecc058
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index ea8669fa18..8cbdcbbcaf 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1825,6 +1825,90 @@ void IntrinsicCodeGeneratorARM::VisitMathNextAfter(HInvoke* invoke) {
   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
 }
 
+void IntrinsicLocationsBuilderARM::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerReverse(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ rbit(out, in);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongReverse(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  __ rbit(out_reg_lo, in_reg_hi);
+  __ rbit(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ rev(out, in);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  __ rev(out_reg_lo, in_reg_hi);
+  __ rev(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARM::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ revsh(out, in);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1834,12 +1918,7 @@ void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
 }
 
 UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongBitCount)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 8741fd284f..b5f15fe22d 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -99,7 +99,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
 //       restored!
 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit IntrinsicSlowPathARM64(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPathARM64(HInvoke* invoke)
+      : SlowPathCodeARM64(invoke), invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index c8629644b6..2f183c3a62 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -99,7 +99,7 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
 //       restored!
 class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in);
@@ -407,7 +407,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
              Primitive::kPrimInt,
              IsR2OrNewer(),
              IsR6(),
-             false,
+             /* reverseBits */ false,
              GetAssembler());
 }
 
@@ -421,7 +421,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
              Primitive::kPrimLong,
              IsR2OrNewer(),
              IsR6(),
-             false,
+             /* reverseBits */ false,
              GetAssembler());
 }
 
@@ -435,7 +435,7 @@ void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
              Primitive::kPrimShort,
              IsR2OrNewer(),
              IsR6(),
-             false,
+             /* reverseBits */ false,
              GetAssembler());
 }
 
@@ -475,7 +475,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* in
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, IsR6(), GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler());
 }
 
 // int java.lang.Long.numberOfLeadingZeros(long i)
@@ -484,7 +484,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invok
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, IsR6(), GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler());
 }
 
 static void GenNumberOfTrailingZeroes(LocationSummary* locations,
@@ -497,7 +497,6 @@ static void GenNumberOfTrailingZeroes(LocationSummary* locations,
   Register in;
 
   if (is64bit) {
-    MipsLabel done;
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
 
     in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
@@ -588,7 +587,11 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* i
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, IsR6(), IsR2OrNewer(), GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(),
+                            /* is64bit */ false,
+                            IsR6(),
+                            IsR2OrNewer(),
+                            GetAssembler());
 }
 
 // int java.lang.Long.numberOfTrailingZeros(long i)
@@ -597,7 +600,11 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invo
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, IsR6(), IsR2OrNewer(), GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(),
+                            /* is64bit */ true,
+                            IsR6(),
+                            IsR2OrNewer(),
+                            GetAssembler());
 }
 
 enum RotationDirection {
@@ -806,7 +813,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) {
              Primitive::kPrimInt,
              IsR2OrNewer(),
              IsR6(),
-             true,
+             /* reverseBits */ true,
              GetAssembler());
 }
 
@@ -820,10 +827,561 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) {
              Primitive::kPrimLong,
              IsR2OrNewer(),
              IsR6(),
-             true,
+             /* reverseBits */ true,
              GetAssembler());
 }
 
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    __ AbsD(out, in);
+  } else {
+    __ AbsS(out, in);
+  }
+}
+
+// double java.lang.Math.abs(double)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+// float java.lang.Math.abs(float)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    // The comments in this section show the analogous operations which would
+    // be performed if we had 64-bit registers "in", and "out".
+    // __ Dsra32(AT, in, 31);
+    __ Sra(AT, in_hi, 31);
+    // __ Xor(out, in, AT);
+    __ Xor(TMP, in_lo, AT);
+    __ Xor(out_hi, in_hi, AT);
+    // __ Dsubu(out, out, AT);
+    __ Subu(out_lo, TMP, AT);
+    __ Sltu(TMP, out_lo, TMP);
+    __ Addu(out_hi, out_hi, TMP);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Sra(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Subu(out, out, AT);
+  }
+}
+
+// int java.lang.Math.abs(int)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+// long java.lang.Math.abs(long)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        Primitive::Type type,
+                        bool is_R6,
+                        MipsAssembler* assembler) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+  FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+  if (is_R6) {
+    MipsLabel noNaNs;
+    MipsLabel done;
+    FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+    // When Java computes min/max it prefers a NaN to a number; the
+    // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+    // the inputs is a NaN and the other is a valid number, the MIPS
+    // instruction will return the number; Java wants the NaN value
+    // returned. This is why there is extra logic preceding the use of
+    // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+    // NaN, return the NaN, otherwise return the min/max.
+    if (type == Primitive::kPrimDouble) {
+      __ CmpUnD(FTMP, a, b);
+      __ Bc1eqz(FTMP, &noNaNs);
+
+      // One of the inputs is a NaN
+      __ CmpEqD(ftmp, a, a);
+      // If a == a then b is the NaN, otherwise a is the NaN.
+      __ SelD(ftmp, a, b);
+
+      if (ftmp != out) {
+        __ MovD(out, ftmp);
+      }
+
+      __ B(&done);
+
+      __ Bind(&noNaNs);
+
+      if (is_min) {
+        __ MinD(out, a, b);
+      } else {
+        __ MaxD(out, a, b);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimFloat);
+      __ CmpUnS(FTMP, a, b);
+      __ Bc1eqz(FTMP, &noNaNs);
+
+      // One of the inputs is a NaN
+      __ CmpEqS(ftmp, a, a);
+      // If a == a then b is the NaN, otherwise a is the NaN.
+      __ SelS(ftmp, a, b);
+
+      if (ftmp != out) {
+        __ MovS(out, ftmp);
+      }
+
+      __ B(&done);
+
+      __ Bind(&noNaNs);
+
+      if (is_min) {
+        __ MinS(out, a, b);
+      } else {
+        __ MaxS(out, a, b);
+      }
+    }
+
+    __ Bind(&done);
+  } else {
+    MipsLabel ordered;
+    MipsLabel compare;
+    MipsLabel select;
+    MipsLabel done;
+
+    if (type == Primitive::kPrimDouble) {
+      __ CunD(a, b);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimFloat);
+      __ CunS(a, b);
+    }
+    __ Bc1f(&ordered);
+
+    // a or b (or both) is a NaN. Return one, which is a NaN.
+    if (type == Primitive::kPrimDouble) {
+      __ CeqD(b, b);
+    } else {
+      __ CeqS(b, b);
+    }
+    __ B(&select);
+
+    __ Bind(&ordered);
+
+    // Neither is a NaN.
+    // a == b? (-0.0 compares equal with +0.0)
+    // If equal, handle zeroes, else compare further.
+    if (type == Primitive::kPrimDouble) {
+      __ CeqD(a, b);
+    } else {
+      __ CeqS(a, b);
+    }
+    __ Bc1f(&compare);
+
+    // a == b either bit for bit or one is -0.0 and the other is +0.0.
+    if (type == Primitive::kPrimDouble) {
+      __ MoveFromFpuHigh(TMP, a);
+      __ MoveFromFpuHigh(AT, b);
+    } else {
+      __ Mfc1(TMP, a);
+      __ Mfc1(AT, b);
+    }
+
+    if (is_min) {
+      // -0.0 prevails over +0.0.
+      __ Or(TMP, TMP, AT);
+    } else {
+      // +0.0 prevails over -0.0.
+      __ And(TMP, TMP, AT);
+    }
+
+    if (type == Primitive::kPrimDouble) {
+      __ Mfc1(AT, a);
+      __ Mtc1(AT, out);
+      __ MoveToFpuHigh(TMP, out);
+    } else {
+      __ Mtc1(TMP, out);
+    }
+    __ B(&done);
+
+    __ Bind(&compare);
+
+    if (type == Primitive::kPrimDouble) {
+      if (is_min) {
+        // return (a <= b) ? a : b;
+        __ ColeD(a, b);
+      } else {
+        // return (a >= b) ? a : b;
+        __ ColeD(b, a);  // b <= a
+      }
+    } else {
+      if (is_min) {
+        // return (a <= b) ? a : b;
+        __ ColeS(a, b);
+      } else {
+        // return (a >= b) ? a : b;
+        __ ColeS(b, a);  // b <= a
+      }
+    }
+
+    __ Bind(&select);
+
+    if (type == Primitive::kPrimDouble) {
+      __ MovtD(out, a);
+      __ MovfD(out, b);
+    } else {
+      __ MovtS(out, a);
+      __ MovfS(out, b);
+    }
+
+    __ Bind(&done);
+  }
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+}
+
+// double java.lang.Math.min(double, double)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              Primitive::kPrimDouble,
+              IsR6(),
+              GetAssembler());
+}
+
+// float java.lang.Math.min(float, float)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              Primitive::kPrimFloat,
+              IsR6(),
+              GetAssembler());
+}
+
+// double java.lang.Math.max(double, double)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              Primitive::kPrimDouble,
+              IsR6(),
+              GetAssembler());
+}
+
+// float java.lang.Math.max(float, float)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              Primitive::kPrimFloat,
+              IsR6(),
+              GetAssembler());
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenMinMax(LocationSummary* locations,
+                      bool is_min,
+                      Primitive::Type type,
+                      bool is_R6,
+                      MipsAssembler* assembler) {
+  if (is_R6) {
+    // Some architectures, such as ARM and MIPS (prior to r6), have a
+    // conditional move instruction which only changes the target
+    // (output) register if the condition is true (MIPS prior to r6 had
+    // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+    // always change the target (output) register.  If the condition is
+    // true the output register gets the contents of the "rs" register;
+    // otherwise, the output register is set to zero. One consequence
+    // of this is that to implement something like "rd = c==0 ? rs : rt"
+    // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+    // After executing this pair of instructions one of the output
+    // registers from the pair will necessarily contain zero. Then the
+    // code ORs the output registers from the SELEQZ/SELNEZ instructions
+    // to get the final result.
+    //
+    // The initial test to see if the output register is same as the
+    // first input register is needed to make sure that value in the
+    // first input register isn't clobbered before we've finished
+    // computing the output value. The logic in the corresponding else
+    // clause performs the same task but makes sure the second input
+    // register isn't clobbered in the event that it's the same register
+    // as the output register; the else clause also handles the case
+    // where the output register is distinct from both the first, and the
+    // second input registers.
+    if (type == Primitive::kPrimLong) {
+      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel compare_done;
+
+      if (a_lo == b_lo) {
+        if (out_lo != a_lo) {
+          __ Move(out_lo, a_lo);
+          __ Move(out_hi, a_hi);
+        }
+      } else {
+        __ Slt(TMP, b_hi, a_hi);
+        __ Bne(b_hi, a_hi, &compare_done);
+
+        __ Sltu(TMP, b_lo, a_lo);
+
+        __ Bind(&compare_done);
+
+        if (is_min) {
+          __ Seleqz(AT, a_lo, TMP);
+          __ Selnez(out_lo, b_lo, TMP);  // Safe even if out_lo == a_lo/b_lo
+                                         // because at this point we're
+                                         // done using a_lo/b_lo.
+        } else {
+          __ Selnez(AT, a_lo, TMP);
+          __ Seleqz(out_lo, b_lo, TMP);  // ditto
+        }
+        __ Or(out_lo, out_lo, AT);
+        if (is_min) {
+          __ Seleqz(AT, a_hi, TMP);
+          __ Selnez(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
+        } else {
+          __ Selnez(AT, a_hi, TMP);
+          __ Seleqz(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
+        }
+        __ Or(out_hi, out_hi, AT);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimInt);
+      Register a = locations->InAt(0).AsRegister<Register>();
+      Register b = locations->InAt(1).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (a == b) {
+        if (out != a) {
+          __ Move(out, a);
+        }
+      } else {
+        __ Slt(AT, b, a);
+        if (is_min) {
+          __ Seleqz(TMP, a, AT);
+          __ Selnez(AT, b, AT);
+        } else {
+          __ Selnez(TMP, a, AT);
+          __ Seleqz(AT, b, AT);
+        }
+        __ Or(out, TMP, AT);
+      }
+    }
+  } else {
+    if (type == Primitive::kPrimLong) {
+      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel compare_done;
+
+      if (a_lo == b_lo) {
+        if (out_lo != a_lo) {
+          __ Move(out_lo, a_lo);
+          __ Move(out_hi, a_hi);
+        }
+      } else {
+        __ Slt(TMP, a_hi, b_hi);
+        __ Bne(a_hi, b_hi, &compare_done);
+
+        __ Sltu(TMP, a_lo, b_lo);
+
+        __ Bind(&compare_done);
+
+        if (is_min) {
+          if (out_lo != a_lo) {
+            __ Movn(out_hi, a_hi, TMP);
+            __ Movn(out_lo, a_lo, TMP);
+          }
+          if (out_lo != b_lo) {
+            __ Movz(out_hi, b_hi, TMP);
+            __ Movz(out_lo, b_lo, TMP);
+          }
+        } else {
+          if (out_lo != a_lo) {
+            __ Movz(out_hi, a_hi, TMP);
+            __ Movz(out_lo, a_lo, TMP);
+          }
+          if (out_lo != b_lo) {
+            __ Movn(out_hi, b_hi, TMP);
+            __ Movn(out_lo, b_lo, TMP);
+          }
+        }
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimInt);
+      Register a = locations->InAt(0).AsRegister<Register>();
+      Register b = locations->InAt(1).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (a == b) {
+        if (out != a) {
+          __ Move(out, a);
+        }
+      } else {
+        __ Slt(AT, a, b);
+        if (is_min) {
+          if (out != a) {
+            __ Movn(out, a, AT);
+          }
+          if (out != b) {
+            __ Movz(out, b, AT);
+          }
+        } else {
+          if (out != a) {
+            __ Movz(out, a, AT);
+          }
+          if (out != b) {
+            __ Movn(out, b, AT);
+          }
+        }
+      }
+    }
+  }
+}
+
+// int java.lang.Math.min(int, int)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ true,
+            Primitive::kPrimInt,
+            IsR6(),
+            GetAssembler());
+}
+
+// long java.lang.Math.min(long, long)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ true,
+            Primitive::kPrimLong,
+            IsR6(),
+            GetAssembler());
+}
+
+// int java.lang.Math.max(int, int)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ false,
+            Primitive::kPrimInt,
+            IsR6(),
+            GetAssembler());
+}
+
+// long java.lang.Math.max(long, long)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ false,
+            Primitive::kPrimLong,
+            IsR6(),
+            GetAssembler());
+}
+
+// double java.lang.Math.sqrt(double)
+void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  __ SqrtD(out, in);
+}
+
 // byte libcore.io.Memory.peekByte(long address)
 void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
@@ -1151,19 +1709,6 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
 UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(LongBitCount)
 
-UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
-UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
-UNIMPLEMENTED_INTRINSIC(MathAbsInt)
-UNIMPLEMENTED_INTRINSIC(MathAbsLong)
-UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
-UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
-UNIMPLEMENTED_INTRINSIC(MathMinIntInt)
-UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
-UNIMPLEMENTED_INTRINSIC(MathMaxIntInt)
-UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
-UNIMPLEMENTED_INTRINSIC(MathSqrt)
 UNIMPLEMENTED_INTRINSIC(MathCeil)
 UNIMPLEMENTED_INTRINSIC(MathFloor)
 UNIMPLEMENTED_INTRINSIC(MathRint)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cf3a3657de..bd4f5329da 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -87,7 +87,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
 //       restored!
 class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPathMIPS64(HInvoke* invoke)
+     : SlowPathCodeMIPS64(invoke), invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in);
@@ -580,25 +581,71 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
 
 static void GenMinMaxFP(LocationSummary* locations,
                         bool is_min,
-                        bool is_double,
+                        Primitive::Type type,
                         Mips64Assembler* assembler) {
-  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
-  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
 
-  if (is_double) {
+  Mips64Label noNaNs;
+  Mips64Label done;
+  FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+  // When Java computes min/max it prefers a NaN to a number; the
+  // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+  // the inputs is a NaN and the other is a valid number, the MIPS
+  // instruction will return the number; Java wants the NaN value
+  // returned. This is why there is extra logic preceding the use of
+  // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+  // NaN, return the NaN, otherwise return the min/max.
+  if (type == Primitive::kPrimDouble) {
+    __ CmpUnD(FTMP, a, b);
+    __ Bc1eqz(FTMP, &noNaNs);
+
+    // One of the inputs is a NaN
+    __ CmpEqD(ftmp, a, a);
+    // If a == a then b is the NaN, otherwise a is the NaN.
+    __ SelD(ftmp, a, b);
+
+    if (ftmp != out) {
+      __ MovD(out, ftmp);
+    }
+
+    __ Bc(&done);
+
+    __ Bind(&noNaNs);
+
     if (is_min) {
-      __ MinD(out, lhs, rhs);
+      __ MinD(out, a, b);
     } else {
-      __ MaxD(out, lhs, rhs);
+      __ MaxD(out, a, b);
     }
   } else {
+    DCHECK_EQ(type, Primitive::kPrimFloat);
+    __ CmpUnS(FTMP, a, b);
+    __ Bc1eqz(FTMP, &noNaNs);
+
+    // One of the inputs is a NaN
+    __ CmpEqS(ftmp, a, a);
+    // If a == a then b is the NaN, otherwise a is the NaN.
+    __ SelS(ftmp, a, b);
+
+    if (ftmp != out) {
+      __ MovS(out, ftmp);
+    }
+
+    __ Bc(&done);
+
+    __ Bind(&noNaNs);
+
     if (is_min) {
-      __ MinS(out, lhs, rhs);
+      __ MinS(out, a, b);
     } else {
-      __ MaxS(out, lhs, rhs);
+      __ MaxS(out, a, b);
     }
   }
+
+  __ Bind(&done);
 }
 
 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -616,7 +663,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke)
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimDouble, GetAssembler());
 }
 
 // float java.lang.Math.min(float, float)
@@ -625,7 +672,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimFloat, GetAssembler());
 }
 
 // double java.lang.Math.max(double, double)
@@ -634,7 +681,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke)
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimDouble, GetAssembler());
 }
 
 // float java.lang.Math.max(float, float)
@@ -643,7 +690,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimFloat, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -653,49 +700,55 @@ static void GenMinMax(LocationSummary* locations,
   GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  // Some architectures, such as ARM and MIPS (prior to r6), have a
-  // conditional move instruction which only changes the target
-  // (output) register if the condition is true (MIPS prior to r6 had
-  // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
-  // change the target (output) register.  If the condition is true the
-  // output register gets the contents of the "rs" register; otherwise,
-  // the output register is set to zero. One consequence of this is
-  // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
-  // needs to use a pair of SELEQZ/SELNEZ instructions.  After
-  // executing this pair of instructions one of the output registers
-  // from the pair will necessarily contain zero. Then the code ORs the
-  // output registers from the SELEQZ/SELNEZ instructions to get the
-  // final result.
-  //
-  // The initial test to see if the output register is same as the
-  // first input register is needed to make sure that value in the
-  // first input register isn't clobbered before we've finished
-  // computing the output value. The logic in the corresponding else
-  // clause performs the same task but makes sure the second input
-  // register isn't clobbered in the event that it's the same register
-  // as the output register; the else clause also handles the case
-  // where the output register is distinct from both the first, and the
-  // second input registers.
-  if (out == lhs) {
-    __ Slt(AT, rhs, lhs);
-    if (is_min) {
-      __ Seleqz(out, lhs, AT);
-      __ Selnez(AT, rhs, AT);
-    } else {
-      __ Selnez(out, lhs, AT);
-      __ Seleqz(AT, rhs, AT);
+  if (lhs == rhs) {
+    if (out != lhs) {
+      __ Move(out, lhs);
     }
   } else {
-    __ Slt(AT, lhs, rhs);
-    if (is_min) {
-      __ Seleqz(out, rhs, AT);
-      __ Selnez(AT, lhs, AT);
+    // Some architectures, such as ARM and MIPS (prior to r6), have a
+    // conditional move instruction which only changes the target
+    // (output) register if the condition is true (MIPS prior to r6 had
+    // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+    // change the target (output) register.  If the condition is true the
+    // output register gets the contents of the "rs" register; otherwise,
+    // the output register is set to zero. One consequence of this is
+    // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+    // needs to use a pair of SELEQZ/SELNEZ instructions.  After
+    // executing this pair of instructions one of the output registers
+    // from the pair will necessarily contain zero. Then the code ORs the
+    // output registers from the SELEQZ/SELNEZ instructions to get the
+    // final result.
+    //
+    // The initial test to see if the output register is same as the
+    // first input register is needed to make sure that value in the
+    // first input register isn't clobbered before we've finished
+    // computing the output value. The logic in the corresponding else
+    // clause performs the same task but makes sure the second input
+    // register isn't clobbered in the event that it's the same register
+    // as the output register; the else clause also handles the case
+    // where the output register is distinct from both the first, and the
+    // second input registers.
+    if (out == lhs) {
+      __ Slt(AT, rhs, lhs);
+      if (is_min) {
+        __ Seleqz(out, lhs, AT);
+        __ Selnez(AT, rhs, AT);
+      } else {
+        __ Selnez(out, lhs, AT);
+        __ Seleqz(AT, rhs, AT);
+      }
     } else {
-      __ Selnez(out, rhs, AT);
-      __ Seleqz(AT, lhs, AT);
+      __ Slt(AT, lhs, rhs);
+      if (is_min) {
+        __ Seleqz(out, rhs, AT);
+        __ Selnez(AT, lhs, AT);
+      } else {
+        __ Selnez(out, rhs, AT);
+        __ Seleqz(AT, lhs, AT);
+      }
     }
+    __ Or(out, out, AT);
   }
-  __ Or(out, out, AT);
 }
 
 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index e70afd29f0..c1f9ae6425 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -39,7 +39,7 @@ namespace art {
 template <typename TDexCallingConvention>
 class IntrinsicSlowPath : public SlowPathCode {
  public:
-  explicit IntrinsicSlowPath(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPath(HInvoke* invoke) : SlowPathCode(invoke), invoke_(invoke) { }
 
   Location MoveArguments(CodeGenerator* codegen) {
     TDexCallingConvention calling_convention_visitor;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f36dc6e2fd..f9acb089ee 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1178,19 +1178,19 @@ HConstant* HUnaryOperation::TryStaticEvaluation() const {
 }
 
 HConstant* HBinaryOperation::TryStaticEvaluation() const {
-  if (GetLeft()->IsIntConstant()) {
-    if (GetRight()->IsIntConstant()) {
-      return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
-    } else if (GetRight()->IsLongConstant()) {
-      return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsLongConstant());
-    }
+  if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
+    return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
   } else if (GetLeft()->IsLongConstant()) {
     if (GetRight()->IsIntConstant()) {
+      // The binop(long, int) case is only valid for shifts and rotations.
+      DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName();
       return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant());
     } else if (GetRight()->IsLongConstant()) {
       return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
     }
   } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+    // The binop(null, null) case is only valid for equal and not-equal conditions.
+    DCHECK(IsEqual() || IsNotEqual()) << DebugName();
     return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
   } else if (kEnableFloatingPointStaticEvaluation) {
     if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 399afabea6..c4764ccbb4 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1247,6 +1247,16 @@ class HLoopInformationOutwardIterator : public ValueObject {
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+/*
+ * Instructions, shared across several (not all) architectures.
+ */
+#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
+  M(MultiplyAccumulate, Instruction)
+#endif
+
 #ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
 #else
@@ -1258,9 +1268,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
+  M(Arm64BitwiseNegatedRight, Instruction)                              \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
-  M(Arm64IntermediateAddress, Instruction)                              \
-  M(Arm64MultiplyAccumulate, Instruction)
+  M(Arm64IntermediateAddress, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1281,6 +1291,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
 
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                               \
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
@@ -2821,20 +2832,15 @@ class HBinaryOperation : public HExpression<2> {
   // Apply this operation to `x` and `y`.
   virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                               HNullConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
-    return nullptr;
+    LOG(FATAL) << DebugName() << " is not defined for the (null, null) case.";
+    UNREACHABLE();
   }
   virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
-  virtual HConstant* Evaluate(HIntConstant* x ATTRIBUTE_UNUSED,
-                              HLongConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (int, long) case.";
-    return nullptr;
-  }
   virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED,
                               HIntConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (long, int) case.";
-    return nullptr;
+    LOG(FATAL) << DebugName() << " is not defined for the (long, int) case.";
+    UNREACHABLE();
   }
   virtual HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const = 0;
   virtual HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const = 0;
@@ -4305,8 +4311,6 @@ class HShl : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
   }
-  // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this
-  // case is handled as `x << static_cast<int>(y)`.
   HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
@@ -4351,8 +4355,6 @@ class HShr : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
   }
-  // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this
-  // case is handled as `x >> static_cast<int>(y)`.
   HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
@@ -4398,8 +4400,6 @@ class HUShr : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
   }
-  // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this
-  // case is handled as `x >>> static_cast<int>(y)`.
   HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
@@ -4435,21 +4435,12 @@ class HAnd : public HBinaryOperation {
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x & y) { return x & y; }
+  template <typename T> T Compute(T x, T y) const { return x & y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -4481,21 +4472,12 @@ class HOr : public HBinaryOperation {
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x | y) { return x | y; }
+  template <typename T> T Compute(T x, T y) const { return x | y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -4527,21 +4509,12 @@ class HXor : public HBinaryOperation {
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x ^ y) { return x ^ y; }
+  template <typename T> T Compute(T x, T y) const { return x ^ y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -6060,6 +6033,9 @@ class HParallelMove : public HTemplateInstruction<0> {
 
 }  // namespace art
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+#include "nodes_shared.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "nodes_arm.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 445cdab191..75a71e78b8 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,38 +118,64 @@ class HArm64IntermediateAddress : public HExpression<2> {
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
-class HArm64MultiplyAccumulate : public HExpression<3> {
+class HArm64BitwiseNegatedRight : public HBinaryOperation {
  public:
-  HArm64MultiplyAccumulate(Primitive::Type type,
-                           InstructionKind op,
-                           HInstruction* accumulator,
-                           HInstruction* mul_left,
-                           HInstruction* mul_right,
-                           uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
-    SetRawInputAt(kInputAccumulatorIndex, accumulator);
-    SetRawInputAt(kInputMulLeftIndex, mul_left);
-    SetRawInputAt(kInputMulRightIndex, mul_right);
+  HArm64BitwiseNegatedRight(Primitive::Type result_type,
+                            InstructionKind op,
+                            HInstruction* left,
+                            HInstruction* right,
+                            uint32_t dex_pc = kNoDexPc)
+    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
+      op_kind_(op) {
+    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
   }
 
-  static constexpr int kInputAccumulatorIndex = 0;
-  static constexpr int kInputMulLeftIndex = 1;
-  static constexpr int kInputMulRightIndex = 2;
+  template <typename T, typename U>
+  auto Compute(T x, U y) const -> decltype(x & ~y) {
+    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
+                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
+                  "Inconsistent negated bitwise types");
+    switch (op_kind_) {
+      case HInstruction::kAnd:
+        return x & ~y;
+      case HInstruction::kOr:
+        return x | ~y;
+      case HInstruction::kXor:
+        return x ^ ~y;
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+  }
 
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   InstructionKind GetOpKind() const { return op_kind_; }
 
-  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
+  DECLARE_INSTRUCTION(Arm64BitwiseNegatedRight);
 
  private:
-  // Indicates if this is a MADD or MSUB.
-  InstructionKind op_kind_;
+  // Specifies the bitwise operation, which will be then negated.
+  const InstructionKind op_kind_;
 
-  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
+  DISALLOW_COPY_AND_ASSIGN(HArm64BitwiseNegatedRight);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
new file mode 100644
index 0000000000..b04b622838
--- /dev/null
+++ b/compiler/optimizing/nodes_shared.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+
+namespace art {
+
+class HMultiplyAccumulate : public HExpression<3> {
+ public:
+  HMultiplyAccumulate(Primitive::Type type,
+                      InstructionKind op,
+                      HInstruction* accumulator,
+                      HInstruction* mul_left,
+                      HInstruction* mul_right,
+                      uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b1891c979e..5a9f2583fd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -60,6 +60,7 @@
 #include "induction_var_analysis.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
+#include "instruction_simplifier_arm.h"
 #include "intrinsics.h"
 #include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
@@ -438,7 +439,10 @@ static void RunArchOptimizations(InstructionSet instruction_set,
     case kThumb2:
     case kArm: {
       arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      arm::InstructionSimplifierArm* simplifier =
+          new (arena) arm::InstructionSimplifierArm(graph, stats);
       HOptimization* arm_optimizations[] = {
+        simplifier,
         fixups
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 4784de1380..54cbdf8b66 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -63,8 +63,7 @@ void StackMapStream::EndStackMapEntry() {
 void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
   if (kind != DexRegisterLocation::Kind::kNone) {
     // Ensure we only use non-compressed location kind at this stage.
-    DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
-        << DexRegisterLocation::PrettyDescriptor(kind);
+    DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << kind;
     DexRegisterLocation location(kind, value);
 
     // Look for Dex register `location` in the location catalog (using the
@@ -257,6 +256,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
   // Ensure we reached the end of the Dex registers location_catalog.
   DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
 
+  ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false);
   uintptr_t next_dex_register_map_offset = 0;
   uintptr_t next_inline_info_offset = 0;
   for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
@@ -268,6 +268,9 @@ void StackMapStream::FillIn(MemoryRegion region) {
     stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
     if (entry.sp_mask != nullptr) {
       stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
+    } else {
+      // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
+      stack_map.SetStackMask(stack_map_encoding_, empty_bitmask);
     }
 
     if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
@@ -344,6 +347,11 @@ void StackMapStream::FillIn(MemoryRegion region) {
       }
     }
   }
+
+  // Verify all written data in debug build.
+  if (kIsDebugBuild) {
+    CheckCodeInfo(region);
+  }
 }
 
 void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map,
@@ -423,4 +431,90 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn
   return true;
 }
 
+// Helper for CheckCodeInfo - check that register map has the expected content.
+void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
+                                         const DexRegisterMap& dex_register_map,
+                                         size_t num_dex_registers,
+                                         BitVector* live_dex_registers_mask,
+                                         size_t dex_register_locations_index) const {
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+  for (size_t reg = 0; reg < num_dex_registers; reg++) {
+    // Find the location we tried to encode.
+    DexRegisterLocation expected = DexRegisterLocation::None();
+    if (live_dex_registers_mask->IsBitSet(reg)) {
+      size_t catalog_index = dex_register_locations_[dex_register_locations_index++];
+      expected = location_catalog_entries_[catalog_index];
+    }
+    // Compare to the seen location.
+    if (expected.GetKind() == DexRegisterLocation::Kind::kNone) {
+      DCHECK(!dex_register_map.IsValid() || !dex_register_map.IsDexRegisterLive(reg));
+    } else {
+      DCHECK(dex_register_map.IsDexRegisterLive(reg));
+      DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation(
+          reg, num_dex_registers, code_info, encoding);
+      DCHECK_EQ(expected.GetKind(), seen.GetKind());
+      DCHECK_EQ(expected.GetValue(), seen.GetValue());
+    }
+  }
+  if (num_dex_registers == 0) {
+    DCHECK(!dex_register_map.IsValid());
+  }
+}
+
+// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
+void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
+  CodeInfo code_info(region);
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+  DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
+  for (size_t s = 0; s < stack_maps_.size(); ++s) {
+    const StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+    StackMapEntry entry = stack_maps_[s];
+
+    // Check main stack map fields.
+    DCHECK_EQ(stack_map.GetNativePcOffset(encoding), entry.native_pc_offset);
+    DCHECK_EQ(stack_map.GetDexPc(encoding), entry.dex_pc);
+    DCHECK_EQ(stack_map.GetRegisterMask(encoding), entry.register_mask);
+    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
+    if (entry.sp_mask != nullptr) {
+      DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
+      for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
+        DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
+      }
+    } else {
+      for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
+        DCHECK_EQ(stack_mask.LoadBit(b), 0u);
+      }
+    }
+
+    CheckDexRegisterMap(code_info,
+                        code_info.GetDexRegisterMapOf(
+                            stack_map, encoding, entry.num_dex_registers),
+                        entry.num_dex_registers,
+                        entry.live_dex_registers_mask,
+                        entry.dex_register_locations_start_index);
+
+    // Check inline info.
+    DCHECK_EQ(stack_map.HasInlineInfo(encoding), (entry.inlining_depth != 0));
+    if (entry.inlining_depth != 0) {
+      InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+      DCHECK_EQ(inline_info.GetDepth(), entry.inlining_depth);
+      for (size_t d = 0; d < entry.inlining_depth; ++d) {
+        size_t inline_info_index = entry.inline_infos_start_index + d;
+        DCHECK_LT(inline_info_index, inline_infos_.size());
+        InlineInfoEntry inline_entry = inline_infos_[inline_info_index];
+        DCHECK_EQ(inline_info.GetDexPcAtDepth(d), inline_entry.dex_pc);
+        DCHECK_EQ(inline_info.GetMethodIndexAtDepth(d), inline_entry.method_index);
+        DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(d), inline_entry.invoke_type);
+
+        CheckDexRegisterMap(code_info,
+                            code_info.GetDexRegisterMapAtDepth(
+                                d, inline_info, encoding, inline_entry.num_dex_registers),
+                            inline_entry.num_dex_registers,
+                            inline_entry.live_dex_registers_mask,
+                            inline_entry.dex_register_locations_start_index);
+      }
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index fc27a2b446..016a911424 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -167,6 +167,13 @@ class StackMapStream : public ValueObject {
                             const BitVector& live_dex_registers_mask,
                             uint32_t start_index_in_dex_register_locations) const;
 
+  void CheckDexRegisterMap(const CodeInfo& code_info,
+                           const DexRegisterMap& dex_register_map,
+                           size_t num_dex_registers,
+                           BitVector* live_dex_registers_mask,
+                           size_t dex_register_locations_index) const;
+  void CheckCodeInfo(MemoryRegion region) const;
+
   ArenaAllocator* allocator_;
   ArenaVector<StackMapEntry> stack_maps_;