Intrinsify System.arraycopy.

Currently on x64, will do the other architectures in
different changes.

Change-Id: I15fbbadb450dd21787809759a8b14b21b1e42624
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index af93aab..e1a2838 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -73,6 +73,7 @@
     false,  // kIntrinsicUnsafeGet
     false,  // kIntrinsicUnsafePut
     true,   // kIntrinsicSystemArrayCopyCharArray
+    true,   // kIntrinsicSystemArrayCopy
 };
 static_assert(arraysize(kIntrinsicIsStatic) == kInlineOpNop,
               "arraysize of kIntrinsicIsStatic unexpected");
@@ -121,6 +122,8 @@
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopy],
+              "SystemArrayCopy must be static");
 
 MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke) {
   MIR* insn = mir_graph->NewMIR();
@@ -326,6 +329,9 @@
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
         kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
+    // kProtoCacheObjectIObjectII_V
+    { kClassCacheVoid, 5, {kClassCacheJavaLangObject, kClassCacheInt,
+        kClassCacheJavaLangObject, kClassCacheInt, kClassCacheInt} },
     // kProtoCacheIICharArrayI_V
     { kClassCacheVoid, 4, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray,
         kClassCacheInt } },
@@ -481,6 +487,8 @@
 
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
+    INTRINSIC(JavaLangSystem, ArrayCopy, ObjectIObjectII_V , kIntrinsicSystemArrayCopy,
+              0),
 
     INTRINSIC(JavaLangInteger, RotateRight, II_I, kIntrinsicRotateRight, k32),
     INTRINSIC(JavaLangLong, RotateRight, JI_J, kIntrinsicRotateRight, k64),
@@ -653,6 +661,7 @@
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
     case kIntrinsicRotateLeft:
+    case kIntrinsicSystemArrayCopy:
       return false;   // not implemented in quick.
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 8458806..5ce110c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -259,6 +259,7 @@
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
       kProtoCacheCharArrayICharArrayII_V,
+      kProtoCacheObjectIObjectII_V,
       kProtoCacheIICharArrayI_V,
       kProtoCacheByteArrayIII_String,
       kProtoCacheIICharArray_String,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 272d86f..9a1cc53 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -36,9 +36,6 @@
 
 namespace x86_64 {
 
-// Some x86_64 instructions require a register to be available as temp.
-static constexpr Register TMP = R11;
-
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ecc8630..d6a6a7e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -30,6 +30,9 @@
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
 
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
 static constexpr FloatRegister kParameterFloatRegisters[] =
     { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 8a0c8c2..7e27aa6 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -76,6 +76,9 @@
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifySystemArrayCopy(HInvoke* invoke);
+  void SimplifyStringEquals(HInvoke* invoke);
+
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -1039,28 +1042,100 @@
   instruction->GetBlock()->RemoveInstruction(instruction);
 }
 
-void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
-  if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
-    HInstruction* argument = instruction->InputAt(1);
-    HInstruction* receiver = instruction->InputAt(0);
-    if (receiver == argument) {
-      // Because String.equals is an instance call, the receiver is
-      // a null check if we don't know it's null. The argument however, will
-      // be the actual object. So we cannot end up in a situation where both
-      // are equal but could be null.
-      DCHECK(CanEnsureNotNullAt(argument, instruction));
-      instruction->ReplaceWith(GetGraph()->GetIntConstant(1));
-      instruction->GetBlock()->RemoveInstruction(instruction);
-    } else {
-      StringEqualsOptimizations optimizations(instruction);
-      if (CanEnsureNotNullAt(argument, instruction)) {
-        optimizations.SetArgumentNotNull();
+void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
+  HInstruction* argument = instruction->InputAt(1);
+  HInstruction* receiver = instruction->InputAt(0);
+  if (receiver == argument) {
+    // Because String.equals is an instance call, the receiver is
+    // a null check if we don't know it's null. The argument however, will
+    // be the actual object. So we cannot end up in a situation where both
+    // are equal but could be null.
+    DCHECK(CanEnsureNotNullAt(argument, instruction));
+    instruction->ReplaceWith(GetGraph()->GetIntConstant(1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  } else {
+    StringEqualsOptimizations optimizations(instruction);
+    if (CanEnsureNotNullAt(argument, instruction)) {
+      optimizations.SetArgumentNotNull();
+    }
+    ScopedObjectAccess soa(Thread::Current());
+    ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo();
+    if (argument_rti.IsValid() && argument_rti.IsStringClass()) {
+      optimizations.SetArgumentIsString();
+    }
+  }
+}
+
+static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
+  if (potential_length->IsArrayLength()) {
+    return potential_length->InputAt(0) == potential_array;
+  }
+
+  if (potential_array->IsNewArray()) {
+    return potential_array->InputAt(0) == potential_length;
+  }
+
+  return false;
+}
+
+void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) {
+  HInstruction* source = instruction->InputAt(0);
+  HInstruction* destination = instruction->InputAt(2);
+  HInstruction* count = instruction->InputAt(4);
+  SystemArrayCopyOptimizations optimizations(instruction);
+  if (CanEnsureNotNullAt(source, instruction)) {
+    optimizations.SetSourceIsNotNull();
+  }
+  if (CanEnsureNotNullAt(destination, instruction)) {
+    optimizations.SetDestinationIsNotNull();
+  }
+  if (destination == source) {
+    optimizations.SetDestinationIsSource();
+  }
+
+  if (IsArrayLengthOf(count, source)) {
+    optimizations.SetCountIsSourceLength();
+  }
+
+  if (IsArrayLengthOf(count, destination)) {
+    optimizations.SetCountIsDestinationLength();
+  }
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
+    if (destination_rti.IsValid()) {
+      if (destination_rti.IsObjectArray()) {
+        if (destination_rti.IsExact()) {
+          optimizations.SetDoesNotNeedTypeCheck();
+        }
+        optimizations.SetDestinationIsTypedObjectArray();
       }
-      ScopedObjectAccess soa(Thread::Current());
-      if (argument->GetReferenceTypeInfo().IsStringClass()) {
-        optimizations.SetArgumentIsString();
+      if (destination_rti.IsPrimitiveArrayClass()) {
+        optimizations.SetDestinationIsPrimitiveArray();
+      } else if (destination_rti.IsNonPrimitiveArrayClass()) {
+        optimizations.SetDestinationIsNonPrimitiveArray();
       }
     }
+    ReferenceTypeInfo source_rti = source->GetReferenceTypeInfo();
+    if (source_rti.IsValid()) {
+      if (destination_rti.IsValid() && destination_rti.CanArrayHoldValuesOf(source_rti)) {
+        optimizations.SetDoesNotNeedTypeCheck();
+      }
+      if (source_rti.IsPrimitiveArrayClass()) {
+        optimizations.SetSourceIsPrimitiveArray();
+      } else if (source_rti.IsNonPrimitiveArrayClass()) {
+        optimizations.SetSourceIsNonPrimitiveArray();
+      }
+    }
+  }
+}
+
+void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
+  if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
+    SimplifyStringEquals(instruction);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
+    SimplifySystemArrayCopy(instruction);
   }
 }
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 9564622..dbe7524 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -210,6 +210,9 @@
     case kIntrinsicSystemArrayCopyCharArray:
       return Intrinsics::kSystemArrayCopyChar;
 
+    case kIntrinsicSystemArrayCopy:
+      return Intrinsics::kSystemArrayCopy;
+
     // Thread.currentThread.
     case kIntrinsicCurrentThread:
       return  Intrinsics::kThreadCurrentThread;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index d50fe79..e459516 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -168,6 +168,26 @@
   DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations);
 };
 
+class SystemArrayCopyOptimizations : public IntrinsicOptimizations {
+ public:
+  explicit SystemArrayCopyOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
+
+  INTRINSIC_OPTIMIZATION(SourceIsNotNull, 0);
+  INTRINSIC_OPTIMIZATION(DestinationIsNotNull, 1);
+  INTRINSIC_OPTIMIZATION(DestinationIsSource, 2);
+  INTRINSIC_OPTIMIZATION(CountIsSourceLength, 3);
+  INTRINSIC_OPTIMIZATION(CountIsDestinationLength, 4);
+  INTRINSIC_OPTIMIZATION(DoesNotNeedTypeCheck, 5);
+  INTRINSIC_OPTIMIZATION(DestinationIsTypedObjectArray, 6);
+  INTRINSIC_OPTIMIZATION(DestinationIsNonPrimitiveArray, 7);
+  INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8);
+  INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9);
+  INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations);
+};
+
 #undef INTRISIC_OPTIMIZATION
 
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 938c78e..2793793 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1333,6 +1333,7 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index b0cfd0d..4da94ee 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1447,6 +1447,7 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index bfe5e55..8f1d5e1 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -58,6 +58,7 @@
   V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \
   V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \
   V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \
+  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \
   V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \
   V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \
   V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b60905d..764a114 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -812,6 +812,7 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 263c375..e83aebb 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2255,6 +2255,7 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1a13b69..fa1ff23 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -754,7 +754,7 @@
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCallOnSlowPath,
                                                             kIntrinsified);
-  // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   locations->SetInAt(2, Location::RequiresRegister());
@@ -770,19 +770,27 @@
 static void CheckPosition(X86_64Assembler* assembler,
                           Location pos,
                           CpuRegister input,
-                          CpuRegister length,
+                          Location length,
                           SlowPathCode* slow_path,
                           CpuRegister input_len,
-                          CpuRegister temp) {
-  // Where is the length in the String?
+                          CpuRegister temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
   if (pos.IsConstant()) {
     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
     if (pos_const == 0) {
-      // Check that length(input) >= length.
-      __ cmpl(Address(input, length_offset), length);
-      __ j(kLess, slow_path->GetEntryLabel());
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        if (length.IsConstant()) {
+          __ cmpl(Address(input, length_offset),
+                  Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
+        }
+        __ j(kLess, slow_path->GetEntryLabel());
+      }
     } else {
       // Check that length(input) >= pos.
       __ movl(input_len, Address(input, length_offset));
@@ -791,9 +799,18 @@
 
       // Check that (length(input) - pos) >= length.
       __ leal(temp, Address(input_len, -pos_const));
-      __ cmpl(temp, length);
+      if (length.IsConstant()) {
+        __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmpl(temp, length.AsRegister<CpuRegister>());
+      }
       __ j(kLess, slow_path->GetEntryLabel());
     }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
@@ -807,7 +824,11 @@
     // Check that (length(input) - pos) >= length.
     __ movl(temp, Address(input, length_offset));
     __ subl(temp, pos_reg);
-    __ cmpl(temp, length);
+    if (length.IsConstant()) {
+      __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(temp, length.AsRegister<CpuRegister>());
+    }
     __ j(kLess, slow_path->GetEntryLabel());
   }
 }
@@ -817,9 +838,9 @@
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
-  Location srcPos = locations->InAt(1);
+  Location src_pos = locations->InAt(1);
   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
-  Location destPos = locations->InAt(3);
+  Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
 
   // Temporaries that we need for MOVSW.
@@ -852,6 +873,12 @@
     __ j(kLess, slow_path->GetEntryLabel());
   }
 
+  // Validity checks: source.
+  CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
+
+  // Validity checks: dest.
+  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
+
   // We need the count in RCX.
   if (length.IsConstant()) {
     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
@@ -859,12 +886,6 @@
     __ movl(count, length.AsRegister<CpuRegister>());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
-
-  // Validity checks: dest.
-  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
-
   // Okay, everything checks out.  Finally time to do the copy.
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
@@ -872,18 +893,18 @@
 
   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
 
-  if (srcPos.IsConstant()) {
-    int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
   } else {
-    __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(),
+    __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
                               ScaleFactor::TIMES_2, data_offset));
   }
-  if (destPos.IsConstant()) {
-    int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
+  if (dest_pos.IsConstant()) {
+    int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
   } else {
-    __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(),
+    __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
                                ScaleFactor::TIMES_2, data_offset));
   }
 
@@ -893,6 +914,277 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+
+void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be > 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
+  Location src_pos = locations->InAt(1);
+  CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+  CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  NearLabel ok;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource()) {
+    __ cmpl(src, dest);
+  }
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ j(kNotEqual, &ok);
+      }
+      __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
+      __ j(kGreater, slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ j(kNotEqual, &ok);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
+      __ j(kLess, slow_path->GetEntryLabel());
+    }
+  }
+
+  __ Bind(&ok);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ testl(src, src);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ testl(dest, dest);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+    __ movl(temp1, Address(dest, class_offset));
+    __ movl(temp2, Address(src, class_offset));
+    bool did_unpoison = false;
+    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+        !optimizations.GetSourceIsNonPrimitiveArray()) {
+      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // both to make the identity check valid.
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ MaybeUnpoisonHeapReference(temp2);
+      did_unpoison = true;
+    }
+
+    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+      // Bail out if the destination is not a non primitive array.
+      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      // Bail out if the source is not a non primitive array.
+      __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    __ cmpl(temp1, temp2);
+
+    if (optimizations.GetDestinationIsTypedObjectArray()) {
+      NearLabel do_copy;
+      __ j(kEqual, &do_copy);
+      if (!did_unpoison) {
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ movl(temp1, Address(temp1, component_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ movl(temp1, Address(temp1, super_offset));
+      // No need to unpoison the result, we're comparing against null.
+      __ testl(temp1, temp1);
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(&do_copy);
+    } else {
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    __ movl(temp1, Address(src, class_offset));
+    __ MaybeUnpoisonHeapReference(temp1);
+    __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+    __ testl(CpuRegister(TMP), CpuRegister(TMP));
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Compute base source address, base destination address, and end source address.
+
+  uint32_t element_size = sizeof(int32_t);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp1, Address(src, element_size * constant + offset));
+  } else {
+    __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (dest_pos.IsConstant()) {
+    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp2, Address(dest, element_size * constant + offset));
+  } else {
+    __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (length.IsConstant()) {
+    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp3, Address(temp1, element_size * constant));
+  } else {
+    __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
+  }
+
+  // Iterate over the arrays and do a raw copy of the objects. We don't need to
+  // poison/unpoison, nor do any read barrier as the next uses of the destination
+  // array will do it.
+  NearLabel loop, done;
+  __ cmpl(temp1, temp3);
+  __ j(kEqual, &done);
+  __ Bind(&loop);
+  __ movl(CpuRegister(TMP), Address(temp1, 0));
+  __ movl(Address(temp2, 0), CpuRegister(TMP));
+  __ addl(temp1, Immediate(element_size));
+  __ addl(temp2, Immediate(element_size));
+  __ cmpl(temp1, temp3);
+  __ j(kNotEqual, &loop);
+  __ Bind(&done);
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       CpuRegister(kNoRegister),
+                       false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 95dd039..282a483 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1657,7 +1657,8 @@
   }
 
   bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsValid() && GetTypeHandle()->IsStringClass();
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsStringClass();
   }
 
   bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1671,15 +1672,36 @@
   }
 
   bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
     return GetTypeHandle()->IsArrayClass();
   }
 
+  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+  }
+
   bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
     if (!IsExact()) return false;
     if (!IsArrayClass()) return false;
     return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
   }
 
+  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    if (!rti.IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+        rti.GetTypeHandle()->GetComponentType());
+  }
+
   Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
   bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -4413,7 +4435,7 @@
 
 class HArrayLength : public HExpression<1> {
  public:
-  explicit HArrayLength(HInstruction* array, uint32_t dex_pc)
+  HArrayLength(HInstruction* array, uint32_t dex_pc)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
     // Note that arrays do not change length, so the instruction does not
     // depend on any write.
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 64c2249..837662d 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -74,6 +74,7 @@
   kIntrinsicUnsafeGet,
   kIntrinsicUnsafePut,
   kIntrinsicSystemArrayCopyCharArray,
+  kIntrinsicSystemArrayCopy,
 
   kInlineOpNop,
   kInlineOpReturnArg,
diff --git a/test/537-checker-arraycopy/expected.txt b/test/537-checker-arraycopy/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-arraycopy/expected.txt
diff --git a/test/537-checker-arraycopy/info.txt b/test/537-checker-arraycopy/info.txt
new file mode 100644
index 0000000..ea88f89
--- /dev/null
+++ b/test/537-checker-arraycopy/info.txt
@@ -0,0 +1 @@
+Test for edge cases of System.arraycopy.
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
new file mode 100644
index 0000000..30ccc56
--- /dev/null
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static void main(String[] args) {
+    arraycopy();
+    try {
+      arraycopy(new Object());
+      throw new Error("Should not be here");
+    } catch (ArrayStoreException ase) {
+      // Ignore.
+    }
+    try {
+      arraycopy(null);
+      throw new Error("Should not be here");
+    } catch (NullPointerException npe) {
+      // Ignore.
+    }
+
+    try {
+      arraycopy(new Object[1]);
+      throw new Error("Should not be here");
+    } catch (ArrayIndexOutOfBoundsException aiooe) {
+      // Ignore.
+    }
+
+    arraycopy(new Object[2]);
+    arraycopy(new Object[2], 0);
+
+    try {
+      arraycopy(new Object[1], 1);
+      throw new Error("Should not be here");
+    } catch (ArrayIndexOutOfBoundsException aiooe) {
+      // Ignore.
+    }
+  }
+
+  /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK-NOT:      test
+  /// CHECK-NOT:      call
+  /// CHECK:          ReturnVoid
+  // Checks that the call is intrinsified and that there is no test instruction
+  // when we know the source and destination are not null.
+  public static void arraycopy() {
+    Object[] obj = new Object[4];
+    System.arraycopy(obj, 1, obj, 0, 1);
+  }
+
+  public static void arraycopy(Object obj) {
+    System.arraycopy(obj, 1, obj, 0, 1);
+  }
+
+  public static void arraycopy(Object[] obj, int pos) {
+    System.arraycopy(obj, pos, obj, 0, obj.length);
+  }
+}