Optimize ArraySet for x86/x64/arm/arm64.

Change-Id: I5bc8c6adf7f82f3b211f0c21067f5bb54dd0c040
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 277f6b4..db4a854 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -380,6 +380,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
+class ArraySetSlowPathX86 : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
+};
+
 #undef __
 #define __ down_cast<X86Assembler*>(GetAssembler())->
 
@@ -4245,72 +4290,59 @@
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  bool is_byte_type = (value_type == Primitive::kPrimBoolean)
+      || (value_type == Primitive::kPrimByte);
+  // We need the inputs to be different than the output in case of long operation.
+  // In case of a byte operation, the register allocator does not support multiple
+  // inputs that die at entry with one in a specific register.
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (is_byte_type) {
+    // Ensure the value is in a byte register.
+    locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
+  } else if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    bool is_byte_type = (value_type == Primitive::kPrimBoolean)
-        || (value_type == Primitive::kPrimByte);
-    // We need the inputs to be different than the output in case of long operation.
-    // In case of a byte operation, the register allocator does not support multiple
-    // inputs that die at entry with one in a specific register.
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (is_byte_type) {
-      // Ensure the value is in a byte register.
-      locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
-    } else if (Primitive::IsFloatingPointType(value_type)) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
-    }
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      // Ensure the card is in a byte register.
-      locations->AddTemp(Location::RegisterLocation(ECX));
-    }
+    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    // Ensure the card is in a byte register.
+    locations->AddTemp(Location::RegisterLocation(ECX));
   }
 }
 
 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register array = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.AsRegister<ByteRegister>());
-        } else {
-          __ movb(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_1, offset);
+      if (value.IsRegister()) {
+        __ movb(address, value.AsRegister<ByteRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
-                  value.AsRegister<ByteRegister>());
-        } else {
-          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4318,93 +4350,106 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.AsRegister<Register>());
-        } else {
-          __ movw(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_2, offset);
+      if (value.IsRegister()) {
+        __ movw(address, value.AsRegister<Register>());
       } else {
-        if (value.IsRegister()) {
-          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
-                  value.AsRegister<Register>());
-        } else {
-          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              Register temp = locations->GetTemp(0).AsRegister<Register>();
-              __ movl(temp, value.AsRegister<Register>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, offset), temp);
-            } else {
-              __ movl(Address(obj, offset), value.AsRegister<Register>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, offset), Immediate(v));
-          }
-        } else {
-          DCHECK(index.IsRegister()) << index;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              Register temp = locations->GetTemp(0).AsRegister<Register>();
-              __ movl(temp, value.AsRegister<Register>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), temp);
-            } else {
-              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                      value.AsRegister<Register>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), Immediate(v));
-          }
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+      if (!value.IsRegister()) {
+        // Just setting null.
+        DCHECK(instruction->InputAt(2)->IsNullConstant());
+        DCHECK(value.IsConstant()) << value;
+        __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-        if (needs_write_barrier) {
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          Register card = locations->GetTemp(1).AsRegister<Register>();
-          codegen_->MarkGCCard(
-              temp, card, obj, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        DCHECK(!codegen_->IsLeafMethod());
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      Register register_value = value.AsRegister<Register>();
+      NearLabel done, not_null, do_put;
+      SlowPathCode* slow_path = nullptr;
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          __ testl(register_value, register_value);
+          __ j(kNotEqual, &not_null);
+          __ movl(address, Immediate(0));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ jmp(&done);
+          __ Bind(&not_null);
+        }
+
+        __ movl(temp, Address(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp);
+        __ movl(temp, Address(temp, component_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ cmpl(temp, Address(register_value, class_offset));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          __ j(kEqual, &do_put);
+          __ MaybeUnpoisonHeapReference(temp);
+          __ movl(temp, Address(temp, super_offset));
+          // No need to unpoison, we're comparing against null..
+          __ testl(temp, temp);
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        __ movl(address, temp);
+      } else {
+        __ movl(address, register_value);
+      }
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      Register card = locations->GetTemp(1).AsRegister<Register>();
+      codegen_->MarkGCCard(
+          temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
+      __ Bind(&done);
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+    case Primitive::kPrimInt: {
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+      if (value.IsRegister()) {
+        __ movl(address, value.AsRegister<Register>());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4413,30 +4458,30 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>());
+          __ movl(Address(array, offset), value.AsRegisterPairLow<Register>());
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
+          __ movl(Address(array, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, offset), Immediate(Low32Bits(val)));
+          __ movl(Address(array, offset), Immediate(Low32Bits(val)));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val)));
+          __ movl(Address(array, offset + kX86WordSize), Immediate(High32Bits(val)));
         }
       } else {
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
                   value.AsRegisterPairLow<Register>());
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
                   Immediate(Low32Bits(val)));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   Immediate(High32Bits(val)));
         }
       }
@@ -4444,28 +4489,22 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
       DCHECK(value.IsFpuRegister());
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        __ movss(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      __ movss(address, value.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
       DCHECK(value.IsFpuRegister());
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        __ movsd(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      __ movsd(address, value.AsFpuRegister<XmmRegister>());
       break;
     }