ART: Refactor intrinsics slow-paths

Refactor slow paths so that there is a default implementation for
common cases (only arm64 with vixl is special). Write a generic
intrinsic slow-path that can be reused for the specific architectures.
Move helper functions into CodeGenerator so that they are accessible.

Change-Id: Ibd788dce432601c6a9f7e6f13eab31f28dcb8550
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a54dbf1..a1c6db0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,6 +30,7 @@
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
 #include "stack_map_stream.h"
+#include "utils/label.h"
 
 namespace art {
 
@@ -105,6 +106,9 @@
 
   virtual const char* GetDescription() const = 0;
 
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
  protected:
   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
   static constexpr uint32_t kRegisterNotSaved = -1;
@@ -112,6 +116,9 @@
   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
 
  private:
+  Label entry_label_;
+  Label exit_label_;
+
   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
 };
 
@@ -386,6 +393,14 @@
                              uint32_t dex_pc,
                              SlowPathCode* slow_path) = 0;
 
+  // Generate a call to a static or direct method.
+  virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
+  // Generate a call to a virtual method.
+  virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
+
+  // Copy the result of a call into the given target.
+  virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6f89293..55c9214 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
 
-class NullCheckSlowPathARM : public SlowPathCodeARM {
+class NullCheckSlowPathARM : public SlowPathCode {
  public:
   explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
 
@@ -83,7 +83,7 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
 };
 
-class DivZeroCheckSlowPathARM : public SlowPathCodeARM {
+class DivZeroCheckSlowPathARM : public SlowPathCode {
  public:
   explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : instruction_(instruction) {}
 
@@ -107,7 +107,7 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
 };
 
-class SuspendCheckSlowPathARM : public SlowPathCodeARM {
+class SuspendCheckSlowPathARM : public SlowPathCode {
  public:
   SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
       : instruction_(instruction), successor_(successor) {}
@@ -148,7 +148,7 @@
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM);
 };
 
-class BoundsCheckSlowPathARM : public SlowPathCodeARM {
+class BoundsCheckSlowPathARM : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction)
       : instruction_(instruction) {}
@@ -186,7 +186,7 @@
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM);
 };
 
-class LoadClassSlowPathARM : public SlowPathCodeARM {
+class LoadClassSlowPathARM : public SlowPathCode {
  public:
   LoadClassSlowPathARM(HLoadClass* cls,
                        HInstruction* at,
@@ -239,7 +239,7 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM);
 };
 
-class LoadStringSlowPathARM : public SlowPathCodeARM {
+class LoadStringSlowPathARM : public SlowPathCode {
  public:
   explicit LoadStringSlowPathARM(HLoadString* instruction) : instruction_(instruction) {}
 
@@ -269,7 +269,7 @@
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM);
 };
 
-class TypeCheckSlowPathARM : public SlowPathCodeARM {
+class TypeCheckSlowPathARM : public SlowPathCode {
  public:
   explicit TypeCheckSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
 
@@ -321,7 +321,7 @@
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
 };
 
-class DeoptimizationSlowPathARM : public SlowPathCodeARM {
+class DeoptimizationSlowPathARM : public SlowPathCode {
  public:
   explicit DeoptimizationSlowPathARM(HInstruction* instruction)
     : instruction_(instruction) {}
@@ -1261,7 +1261,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena())
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathARM(deoptimize);
   codegen_->AddSlowPath(slow_path);
   Label* slow_path_entry = slow_path->GetEntryLabel();
@@ -2792,7 +2792,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
@@ -3558,7 +3558,7 @@
 }
 
 void InstructionCodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) {
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
@@ -3920,7 +3920,7 @@
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  SlowPathCodeARM* slow_path =
+  SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
@@ -4256,7 +4256,7 @@
     __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     // TODO: We will need a read barrier here.
 
-    SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
     __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
@@ -4279,7 +4279,7 @@
 
 void InstructionCodeGeneratorARM::VisitClinitCheck(HClinitCheck* check) {
   // We assume the class is not null.
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
       check->GetLoadClass(), check, check->GetDexPc(), true);
   codegen_->AddSlowPath(slow_path);
   GenerateClassInitializationCheck(slow_path,
@@ -4287,7 +4287,7 @@
 }
 
 void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
-    SlowPathCodeARM* slow_path, Register class_reg) {
+    SlowPathCode* slow_path, Register class_reg) {
   __ LoadFromOffset(kLoadWord, IP, class_reg, mirror::Class::StatusOffset().Int32Value());
   __ cmp(IP, ShifterOperand(mirror::Class::kStatusInitialized));
   __ b(slow_path->GetEntryLabel(), LT);
@@ -4305,7 +4305,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
-  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
@@ -4375,7 +4375,7 @@
   Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Label done, zero;
-  SlowPathCodeARM* slow_path = nullptr;
+  SlowPathCode* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
@@ -4428,8 +4428,7 @@
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  SlowPathCodeARM* slow_path =
-      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // avoid null check if we know obj is not null.
@@ -4732,6 +4731,39 @@
   // Will be generated at use site.
 }
 
+void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
+  if (!trg.IsValid()) {
+    DCHECK(type == Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type);
+  if (return_loc.Equals(trg)) {
+    return;
+  }
+
+  // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
+  //       with the last branch.
+  if (type == Primitive::kPrimLong) {
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc.ToLow(), trg.ToLow(), Primitive::kPrimInt, nullptr);
+    parallel_move.AddMove(return_loc.ToHigh(), trg.ToHigh(), Primitive::kPrimInt, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  } else if (type == Primitive::kPrimDouble) {
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc.ToLow(), trg.ToLow(), Primitive::kPrimFloat, nullptr);
+    parallel_move.AddMove(return_loc.ToHigh(), trg.ToHigh(), Primitive::kPrimFloat, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  } else {
+    // Let the parallel move resolver take care of all of this.
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc, trg, type, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 91cfd00..1d98789 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -28,7 +28,6 @@
 namespace arm {
 
 class CodeGeneratorARM;
-class SlowPathCodeARM;
 
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kArmWordSize = kArmPointerSize;
@@ -118,20 +117,6 @@
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
 };
 
-class SlowPathCodeARM : public SlowPathCode {
- public:
-  SlowPathCodeARM() : entry_label_(), exit_label_() {}
-
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
-
- private:
-  Label entry_label_;
-  Label exit_label_;
-
-  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
-};
-
 class LocationsBuilderARM : public HGraphVisitor {
  public:
   LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -187,7 +172,7 @@
   // is the block to branch to if the suspend check is not needed, and after
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
-  void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg);
+  void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -335,8 +320,10 @@
 
   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
 
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp);
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+
+  void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 576406e..8967108 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -365,8 +365,13 @@
     return false;
   }
 
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp);
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+
+  void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
+                              Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL);
+  }
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 8511eb6..f66ecb3 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -302,7 +302,16 @@
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
 
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
+                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL);
+  }
+
+  void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
+                              Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL);
+  }
 
  private:
   // Labels for each block that will be compiled.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9c5ecc3..09e939d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -49,7 +49,7 @@
 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x).Int32Value()
 
-class NullCheckSlowPathX86 : public SlowPathCodeX86 {
+class NullCheckSlowPathX86 : public SlowPathCode {
  public:
   explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {}
 
@@ -75,7 +75,7 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
 };
 
-class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 {
+class DivZeroCheckSlowPathX86 : public SlowPathCode {
  public:
   explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : instruction_(instruction) {}
 
@@ -101,7 +101,7 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
 };
 
-class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 {
+class DivRemMinusOneSlowPathX86 : public SlowPathCode {
  public:
   DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {}
 
@@ -123,7 +123,7 @@
   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
 };
 
-class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
+class BoundsCheckSlowPathX86 : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : instruction_(instruction) {}
 
@@ -161,7 +161,7 @@
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
 };
 
-class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
+class SuspendCheckSlowPathX86 : public SlowPathCode {
  public:
   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
       : instruction_(instruction), successor_(successor) {}
@@ -201,7 +201,7 @@
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
 };
 
-class LoadStringSlowPathX86 : public SlowPathCodeX86 {
+class LoadStringSlowPathX86 : public SlowPathCode {
  public:
   explicit LoadStringSlowPathX86(HLoadString* instruction) : instruction_(instruction) {}
 
@@ -233,7 +233,7 @@
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
 };
 
-class LoadClassSlowPathX86 : public SlowPathCodeX86 {
+class LoadClassSlowPathX86 : public SlowPathCode {
  public:
   LoadClassSlowPathX86(HLoadClass* cls,
                        HInstruction* at,
@@ -285,7 +285,7 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
 };
 
-class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
+class TypeCheckSlowPathX86 : public SlowPathCode {
  public:
   explicit TypeCheckSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
 
@@ -340,7 +340,7 @@
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
 };
 
-class DeoptimizationSlowPathX86 : public SlowPathCodeX86 {
+class DeoptimizationSlowPathX86 : public SlowPathCode {
  public:
   explicit DeoptimizationSlowPathX86(HInstruction* instruction)
     : instruction_(instruction) {}
@@ -1197,7 +1197,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena())
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathX86(deoptimize);
   codegen_->AddSlowPath(slow_path);
   Label* slow_path_entry = slow_path->GetEntryLabel();
@@ -2870,7 +2870,7 @@
           GenerateDivRemWithAnyConstant(instruction);
         }
       } else {
-        SlowPathCodeX86* slow_path =
+        SlowPathCode* slow_path =
           new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
               is_div);
         codegen_->AddSlowPath(slow_path);
@@ -3107,7 +3107,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86(instruction);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
@@ -4049,7 +4049,7 @@
 }
 
 void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
@@ -4489,7 +4489,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
-  SlowPathCodeX86* slow_path =
+  SlowPathCode* slow_path =
     new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction);
 
   if (length_loc.IsConstant()) {
@@ -4856,7 +4856,7 @@
     __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
     // TODO: We will need a read barrier here.
 
-    SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
     __ testl(out, out);
@@ -4880,7 +4880,7 @@
 
 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
   // We assume the class to not be null.
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
       check->GetLoadClass(), check, check->GetDexPc(), true);
   codegen_->AddSlowPath(slow_path);
   GenerateClassInitializationCheck(slow_path,
@@ -4888,7 +4888,7 @@
 }
 
 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
-    SlowPathCodeX86* slow_path, Register class_reg) {
+    SlowPathCode* slow_path, Register class_reg) {
   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
           Immediate(mirror::Class::kStatusInitialized));
   __ j(kLess, slow_path->GetEntryLabel());
@@ -4904,7 +4904,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
-  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
@@ -4973,7 +4973,7 @@
   Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   NearLabel done, zero;
-  SlowPathCodeX86* slow_path = nullptr;
+  SlowPathCode* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // Avoid null check if we know obj is not null.
@@ -5032,8 +5032,7 @@
   Location cls = locations->InAt(1);
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  SlowPathCodeX86* slow_path =
-      new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
@@ -5467,6 +5466,35 @@
   visitor.VisitInsertionOrder();
 }
 
+// TODO: target as memory.
+void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
+  if (!target.IsValid()) {
+    DCHECK(type == Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
+  if (target.Equals(return_loc)) {
+    return;
+  }
+
+  // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
+  //       with the else branch.
+  if (type == Primitive::kPrimLong) {
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), Primitive::kPrimInt, nullptr);
+    parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), Primitive::kPrimInt, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  } else {
+    // Let the parallel move resolver take care of all of this.
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc, target, type, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+}
+
 #undef __
 
 }  // namespace x86
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f3307cf..f38e1ea 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -31,7 +31,6 @@
 static constexpr size_t kX86WordSize = kX86PointerSize;
 
 class CodeGeneratorX86;
-class SlowPathCodeX86;
 
 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
@@ -171,7 +170,7 @@
   // is the block to branch to if the suspend check is not needed, and after
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
-  void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
+  void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -300,9 +299,11 @@
   void Move64(Location destination, Location source);
 
   // Generate a call to a static or direct method.
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   // Generate a call to a virtual method.
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp);
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+
+  void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // Emit linker patches.
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
@@ -384,20 +385,6 @@
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
 };
 
-class SlowPathCodeX86 : public SlowPathCode {
- public:
-  SlowPathCodeX86() : entry_label_(), exit_label_() {}
-
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
-
- private:
-  Label entry_label_;
-  Label exit_label_;
-
-  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86);
-};
-
 }  // namespace x86
 }  // namespace art
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 134bfed..289ef64 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -50,7 +50,7 @@
 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
 
-class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
+class NullCheckSlowPathX86_64 : public SlowPathCode {
  public:
   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
 
@@ -76,7 +76,7 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
 };
 
-class DivZeroCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
+class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
  public:
   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {}
 
@@ -102,7 +102,7 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
 };
 
-class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
+class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
  public:
   DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div)
       : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
@@ -136,7 +136,7 @@
   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
 };
 
-class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
+class SuspendCheckSlowPathX86_64 : public SlowPathCode {
  public:
   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
       : instruction_(instruction), successor_(successor) {}
@@ -176,7 +176,7 @@
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
 };
 
-class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
+class BoundsCheckSlowPathX86_64 : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
     : instruction_(instruction) {}
@@ -213,7 +213,7 @@
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
 };
 
-class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 {
+class LoadClassSlowPathX86_64 : public SlowPathCode {
  public:
   LoadClassSlowPathX86_64(HLoadClass* cls,
                           HInstruction* at,
@@ -266,7 +266,7 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
 };
 
-class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 {
+class LoadStringSlowPathX86_64 : public SlowPathCode {
  public:
   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : instruction_(instruction) {}
 
@@ -298,7 +298,7 @@
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
 };
 
-class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
+class TypeCheckSlowPathX86_64 : public SlowPathCode {
  public:
   explicit TypeCheckSlowPathX86_64(HInstruction* instruction)
       : instruction_(instruction) {}
@@ -355,7 +355,7 @@
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
 };
 
-class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 {
+class DeoptimizationSlowPathX86_64 : public SlowPathCode {
  public:
   explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
       : instruction_(instruction) {}
@@ -1181,7 +1181,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena())
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathX86_64(deoptimize);
   codegen_->AddSlowPath(slow_path);
   Label* slow_path_entry = slow_path->GetEntryLabel();
@@ -3072,7 +3072,7 @@
       GenerateDivRemWithAnyConstant(instruction);
     }
   } else {
-    SlowPathCodeX86_64* slow_path =
+    SlowPathCode* slow_path =
         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
             out.AsRegister(), type, is_div);
     codegen_->AddSlowPath(slow_path);
@@ -3245,7 +3245,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  SlowPathCodeX86_64* slow_path =
+  SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
   codegen_->AddSlowPath(slow_path);
 
@@ -3813,7 +3813,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
-  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
@@ -4232,7 +4232,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
-  SlowPathCodeX86_64* slow_path =
+  SlowPathCode* slow_path =
     new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
@@ -4560,7 +4560,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
-    SlowPathCodeX86_64* slow_path, CpuRegister class_reg) {
+    SlowPathCode* slow_path, CpuRegister class_reg) {
   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
           Immediate(mirror::Class::kStatusInitialized));
   __ j(kLess, slow_path->GetEntryLabel());
@@ -4593,7 +4593,7 @@
     __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
     // TODO: We will need a read barrier here.
 
-    SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
     __ testl(out, out);
@@ -4617,7 +4617,7 @@
 
 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
   // We assume the class to not be null.
-  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
       check->GetLoadClass(), check, check->GetDexPc(), true);
   codegen_->AddSlowPath(slow_path);
   GenerateClassInitializationCheck(slow_path,
@@ -4632,7 +4632,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
-  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
@@ -4701,7 +4701,7 @@
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   NearLabel done, zero;
-  SlowPathCodeX86_64* slow_path = nullptr;
+  SlowPathCode* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // Avoid null check if we know obj is not null.
@@ -4759,8 +4759,7 @@
   Location cls = locations->InAt(1);
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  SlowPathCodeX86_64* slow_path =
-      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction);
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
@@ -5021,6 +5020,26 @@
   return Address::RIP(fixup);
 }
 
+// TODO: trg as memory.
+void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
+  if (!trg.IsValid()) {
+    DCHECK(type == Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
+  if (trg.Equals(return_loc)) {
+    return;
+  }
+
+  // Let the parallel move resolver take care of all of this.
+  HParallelMove parallel_move(GetGraph()->GetArena());
+  parallel_move.AddMove(return_loc, trg, type, nullptr);
+  GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 9b2423f..1ec3580 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -87,20 +87,6 @@
 
 class CodeGeneratorX86_64;
 
-class SlowPathCodeX86_64 : public SlowPathCode {
- public:
-  SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
-
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
-
- private:
-  Label entry_label_;
-  Label exit_label_;
-
-  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
-};
-
 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
@@ -181,7 +167,7 @@
   // is the block to branch to if the suspend check is not needed, and after
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
-  void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
+  void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -311,8 +297,10 @@
     return false;
   }
 
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp);
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+
+  void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index cc8ddb6..938c78e 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -21,6 +21,7 @@
 #include "code_generator_arm.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
+#include "intrinsics_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
 #include "thread.h"
@@ -38,99 +39,7 @@
   return codegen_->GetGraph()->GetArena();
 }
 
-#define __ codegen->GetAssembler()->
-
-static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) {
-  if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
-    return;
-  }
-
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
-    if (type == Primitive::kPrimLong) {
-      Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
-      Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
-      Register res_reg_lo = R0;
-      Register res_reg_hi = R1;
-      if (trg_reg_lo != res_reg_hi) {
-        if (trg_reg_lo != res_reg_lo) {
-          __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
-          __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
-        } else {
-          DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi);
-        }
-      } else {
-        __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
-        __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
-      }
-    } else {
-      Register trg_reg = trg.AsRegister<Register>();
-      Register res_reg = R0;
-      if (trg_reg != res_reg) {
-        __ mov(trg_reg, ShifterOperand(res_reg));
-      }
-    }
-  } else {
-    UNIMPLEMENTED(FATAL) << "Floating-point return.";
-  }
-}
-
-static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) {
-  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
-  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
-}
-
-// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
-// call. This will copy the arguments into the positions for a regular call.
-//
-// Note: The actual parameters are required to be in the locations given by the invoke's location
-//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
-//       restored!
-class IntrinsicSlowPathARM : public SlowPathCodeARM {
- public:
-  explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { }
-
-  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
-    CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in);
-    __ Bind(GetEntryLabel());
-
-    SaveLiveRegisters(codegen, invoke_->GetLocations());
-
-    MoveArguments(invoke_, codegen);
-
-    if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
-                                          Location::RegisterLocation(kArtMethodRegister));
-    } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(),
-                                   Location::RegisterLocation(kArtMethodRegister));
-    }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
-
-    // Copy the result back to the expected output.
-    Location out = invoke_->GetLocations()->Out();
-    if (out.IsValid()) {
-      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
-      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
-    }
-
-    RestoreLiveRegisters(codegen, invoke_->GetLocations());
-    __ b(GetExitLabel());
-  }
-
-  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM"; }
-
- private:
-  // The instruction where this slow path is happening.
-  HInvoke* const invoke_;
-
-  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM);
-};
-
-#undef __
+using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
 
 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
@@ -1094,7 +1003,7 @@
   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
   //       we will not optimize the code for constants (which would save a register).
 
-  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
   codegen_->AddSlowPath(slow_path);
 
   __ ldr(temp, Address(obj, count_offset.Int32Value()));          // temp = str.length.
@@ -1130,7 +1039,7 @@
 
   Register argument = locations->InAt(1).AsRegister<Register>();
   __ cmp(argument, ShifterOperand(0));
-  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
   codegen_->AddSlowPath(slow_path);
   __ b(slow_path->GetEntryLabel(), EQ);
 
@@ -1248,7 +1157,7 @@
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   // or directly dispatch if we have a constant.
-  SlowPathCodeARM* slow_path = nullptr;
+  SlowPathCode* slow_path = nullptr;
   if (invoke->InputAt(1)->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
         std::numeric_limits<uint16_t>::max()) {
@@ -1341,7 +1250,7 @@
 
   Register byte_array = locations->InAt(0).AsRegister<Register>();
   __ cmp(byte_array, ShifterOperand(0));
-  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
   codegen_->AddSlowPath(slow_path);
   __ b(slow_path->GetEntryLabel(), EQ);
 
@@ -1387,7 +1296,7 @@
 
   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
   __ cmp(string_to_copy, ShifterOperand(0));
-  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
   codegen_->AddSlowPath(slow_path);
   __ b(slow_path->GetEntryLabel(), EQ);
 
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
new file mode 100644
index 0000000..e70afd2
--- /dev/null
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_UTILS_H_
+
+#include "base/macros.h"
+#include "code_generator.h"
+#include "locations.h"
+#include "nodes.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
+
+namespace art {
+
+// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
+// intrinsified call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+//
+// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
+//       sub-optimal (compared to a direct pointer call), but this is a slow-path.
+
+template <typename TDexCallingConvention>
+class IntrinsicSlowPath : public SlowPathCode {
+ public:
+  explicit IntrinsicSlowPath(HInvoke* invoke) : invoke_(invoke) { }
+
+  Location MoveArguments(CodeGenerator* codegen) {
+    TDexCallingConvention calling_convention_visitor;
+    IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
+    return calling_convention_visitor.GetMethodLocation();
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    Assembler* assembler = codegen->GetAssembler();
+    assembler->Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    Location method_loc = MoveArguments(codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
+    } else {
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
+    }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      codegen->MoveFromReturnRegister(out, invoke_->GetType());
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    assembler->Jump(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPath);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_UTILS_H_
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d0bc55c..5becf0f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -24,6 +24,7 @@
 #include "code_generator_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
+#include "intrinsics_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
 #include "thread.h"
@@ -57,117 +58,13 @@
   return res != nullptr && res->Intrinsified();
 }
 
-#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
-
-// TODO: target as memory.
-static void MoveFromReturnRegister(Location target,
-                                   Primitive::Type type,
-                                   CodeGeneratorX86* codegen) {
-  if (!target.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
-    return;
-  }
-
-  switch (type) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      Register target_reg = target.AsRegister<Register>();
-      if (target_reg != EAX) {
-        __ movl(target_reg, EAX);
-      }
-      break;
-    }
-    case Primitive::kPrimLong: {
-      Register target_reg_lo = target.AsRegisterPairLow<Register>();
-      Register target_reg_hi = target.AsRegisterPairHigh<Register>();
-      if (target_reg_lo != EAX) {
-        __ movl(target_reg_lo, EAX);
-      }
-      if (target_reg_hi != EDX) {
-        __ movl(target_reg_hi, EDX);
-      }
-      break;
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected void type for valid location " << target;
-      UNREACHABLE();
-
-    case Primitive::kPrimDouble: {
-      XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
-      if (target_reg != XMM0) {
-        __ movsd(target_reg, XMM0);
-      }
-      break;
-    }
-    case Primitive::kPrimFloat: {
-      XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
-      if (target_reg != XMM0) {
-        __ movss(target_reg, XMM0);
-      }
-      break;
-    }
-  }
-}
-
 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
-// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
-// call. This will copy the arguments into the positions for a regular call.
-//
-// Note: The actual parameters are required to be in the locations given by the invoke's location
-//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
-//       restored!
-class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
- public:
-  explicit IntrinsicSlowPathX86(HInvoke* invoke)
-    : invoke_(invoke) { }
+using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
 
-  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
-    CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
-    __ Bind(GetEntryLabel());
-
-    SaveLiveRegisters(codegen, invoke_->GetLocations());
-
-    MoveArguments(invoke_, codegen);
-
-    if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
-                                          Location::RegisterLocation(EAX));
-    } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(EAX));
-    }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
-
-    // Copy the result back to the expected output.
-    Location out = invoke_->GetLocations()->Out();
-    if (out.IsValid()) {
-      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
-      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
-    }
-
-    RestoreLiveRegisters(codegen, invoke_->GetLocations());
-    __ jmp(GetExitLabel());
-  }
-
-  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86"; }
-
- private:
-  // The instruction where this slow path is happening.
-  HInvoke* const invoke_;
-
-  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86);
-};
-
-#undef __
 #define __ assembler->
 
 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -743,7 +640,7 @@
   Location out = invoke->GetLocations()->Out();
   if (out.IsValid()) {
     DCHECK(out.IsRegister());
-    MoveFromReturnRegister(out, invoke->GetType(), codegen);
+    codegen->MoveFromReturnRegister(out, invoke->GetType());
   }
 }
 
@@ -902,7 +799,7 @@
   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
   //       we will not optimize the code for constants (which would save a register).
 
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
 
   X86Assembler* assembler = GetAssembler();
@@ -971,7 +868,7 @@
                           Location pos,
                           Register input,
                           Register length,
-                          SlowPathCodeX86* slow_path,
+                          SlowPathCode* slow_path,
                           Register input_len,
                           Register temp) {
   // Where is the length in the String?
@@ -1030,7 +927,7 @@
   Register count = locations->GetTemp(2).AsRegister<Register>();
   DCHECK_EQ(count, ECX);
 
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
 
   // Bail out if the source and destination are the same (to handle overlap).
@@ -1114,7 +1011,7 @@
 
   Register argument = locations->InAt(1).AsRegister<Register>();
   __ testl(argument, argument);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1259,7 +1156,7 @@
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   // or directly dispatch if we have a constant.
-  SlowPathCodeX86* slow_path = nullptr;
+  SlowPathCode* slow_path = nullptr;
   if (invoke->InputAt(1)->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
@@ -1380,7 +1277,7 @@
 
   Register byte_array = locations->InAt(0).AsRegister<Register>();
   __ testl(byte_array, byte_array);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1422,7 +1319,7 @@
 
   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
   __ testl(string_to_copy, string_to_copy);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 315cbab..2bd86a1 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -24,6 +24,7 @@
 #include "code_generator_x86_64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
+#include "intrinsics_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
 #include "thread.h"
@@ -53,112 +54,13 @@
   return res != nullptr && res->Intrinsified();
 }
 
-#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
-
-// TODO: trg as memory.
-static void MoveFromReturnRegister(Location trg,
-                                   Primitive::Type type,
-                                   CodeGeneratorX86_64* codegen) {
-  if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
-    return;
-  }
-
-  switch (type) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
-      if (trg_reg.AsRegister() != RAX) {
-        __ movl(trg_reg, CpuRegister(RAX));
-      }
-      break;
-    }
-    case Primitive::kPrimLong: {
-      CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
-      if (trg_reg.AsRegister() != RAX) {
-        __ movq(trg_reg, CpuRegister(RAX));
-      }
-      break;
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected void type for valid location " << trg;
-      UNREACHABLE();
-
-    case Primitive::kPrimDouble: {
-      XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
-      if (trg_reg.AsFloatRegister() != XMM0) {
-        __ movsd(trg_reg, XmmRegister(XMM0));
-      }
-      break;
-    }
-    case Primitive::kPrimFloat: {
-      XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
-      if (trg_reg.AsFloatRegister() != XMM0) {
-        __ movss(trg_reg, XmmRegister(XMM0));
-      }
-      break;
-    }
-  }
-}
-
 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
-// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
-// call. This will copy the arguments into the positions for a regular call.
-//
-// Note: The actual parameters are required to be in the locations given by the invoke's location
-//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
-//       restored!
-class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
- public:
-  explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
+using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
 
-  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
-    CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
-    __ Bind(GetEntryLabel());
-
-    SaveLiveRegisters(codegen, invoke_->GetLocations());
-
-    MoveArguments(invoke_, codegen);
-
-    if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(
-          invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
-    } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(RDI));
-    }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
-
-    // Copy the result back to the expected output.
-    Location out = invoke_->GetLocations()->Out();
-    if (out.IsValid()) {
-      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
-      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
-    }
-
-    RestoreLiveRegisters(codegen, invoke_->GetLocations());
-    __ jmp(GetExitLabel());
-  }
-
-  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
-
- private:
-  // The instruction where this slow path is happening.
-  HInvoke* const invoke_;
-
-  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
-};
-
-#undef __
 #define __ assembler->
 
 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -602,7 +504,7 @@
   Location out = invoke->GetLocations()->Out();
   if (out.IsValid()) {
     DCHECK(out.IsRegister());
-    MoveFromReturnRegister(out, invoke->GetType(), codegen);
+    codegen->MoveFromReturnRegister(out, invoke->GetType());
   }
 }
 
@@ -811,7 +713,7 @@
   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
   //       we will not optimize the code for constants (which would save a register).
 
-  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
   codegen_->AddSlowPath(slow_path);
 
   X86_64Assembler* assembler = GetAssembler();
@@ -869,7 +771,7 @@
                           Location pos,
                           CpuRegister input,
                           CpuRegister length,
-                          SlowPathCodeX86_64* slow_path,
+                          SlowPathCode* slow_path,
                           CpuRegister input_len,
                           CpuRegister temp) {
   // Where is the length in the String?
@@ -928,7 +830,7 @@
   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
   DCHECK_EQ(count.AsRegister(), RCX);
 
-  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
   codegen_->AddSlowPath(slow_path);
 
   // Bail out if the source and destination are the same.
@@ -1010,7 +912,7 @@
 
   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
   __ testl(argument, argument);
-  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1156,7 +1058,7 @@
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   // or directly dispatch if we have a constant.
-  SlowPathCodeX86_64* slow_path = nullptr;
+  SlowPathCode* slow_path = nullptr;
   if (invoke->InputAt(1)->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
@@ -1276,7 +1178,7 @@
 
   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
   __ testl(byte_array, byte_array);
-  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1320,7 +1222,7 @@
 
   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
   __ testl(string_to_copy, string_to_copy);
-  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());