Revert^2 "Improve ArraySet codegen."

This reverts commit 0dda8c84938d6bb4ce5a1707e5e109ea187fc33d.

The original change had two issues that have been fixed.
First, for heap poisoning, the null branch skipped over the
reference poisoning instructions which copy and poison the
value, thus writing whatever was left in the register.
Second, the change erroneously assumed that the slow path
performed only the assignability check and bound the slow
path exit label before the actual array store, unnecessarily
re-doing the store.

Change-Id: I9f380efa12aa807b4f566a932dbc9dae824fb25a
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: testrunner.py --target --optimizing
Test: Repeat the above with
      ART_USE_READ_BARRIER=false ART_HEAP_POISONING=true
Bug: 32489401
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 28a7583..177d982 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2497,12 +2497,10 @@
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
   DataType::Type value_type = instruction->GetComponentType();
 
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
       instruction,
-      may_need_runtime_call_for_type_check ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall);
+      needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
@@ -2517,7 +2515,7 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   DataType::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -2530,7 +2528,7 @@
   MacroAssembler* masm = GetVIXLAssembler();
 
   if (!needs_write_barrier) {
-    DCHECK(!may_need_runtime_call_for_type_check);
+    DCHECK(!needs_type_check);
     if (index.IsConstant()) {
       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
       destination = HeapOperand(array, offset);
@@ -2562,123 +2560,105 @@
     }
   } else {
     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
-    vixl::aarch64::Label done;
+
+    bool can_value_be_null = instruction->GetValueCanBeNull();
+    vixl::aarch64::Label do_store;
+    if (can_value_be_null) {
+      __ Cbz(Register(value), &do_store);
+    }
+
     SlowPathCodeARM64* slow_path = nullptr;
-    {
-      // We use a block to end the scratch scope before the write barrier, thus
-      // freeing the temporary registers so they can be used in `MarkGCCard`.
+    if (needs_type_check) {
+      slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
+      codegen_->AddSlowPath(slow_path);
+
+      const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
-      if (index.IsConstant()) {
-        offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
-        destination = HeapOperand(array, offset);
-      } else {
-        destination = HeapOperand(temp,
-                                  XRegisterFrom(index),
-                                  LSL,
-                                  DataType::SizeShift(value_type));
+      Register temp2 = temps.AcquireSameSizeAs(array);
+
+      // Note that when Baker read barriers are enabled, the type
+      // checks are performed without read barriers.  This is fine,
+      // even in the case where a class object is in the from-space
+      // after the flip, as a comparison involving such a type would
+      // not produce a false positive; it may of course produce a
+      // false negative, in which case we would take the ArraySet
+      // slow path.
+
+      // /* HeapReference<Class> */ temp = array->klass_
+      {
+        // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+        __ Ldr(temp, HeapOperand(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
+      GetAssembler()->MaybeUnpoisonHeapReference(temp);
 
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      __ Ldr(temp, HeapOperand(temp, component_offset));
+      // /* HeapReference<Class> */ temp2 = value->klass_
+      __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+      // If heap poisoning is enabled, no need to unpoison `temp`
+      // nor `temp2`, as we are comparing two poisoned references.
+      __ Cmp(temp, temp2);
 
-      if (may_need_runtime_call_for_type_check) {
-        slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
-        codegen_->AddSlowPath(slow_path);
-        if (instruction->GetValueCanBeNull()) {
-          vixl::aarch64::Label non_zero;
-          __ Cbnz(Register(value), &non_zero);
-          if (!index.IsConstant()) {
-            __ Add(temp, array, offset);
-          }
-          {
-            // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
-            // emitted.
-            EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-            __ Str(wzr, destination);
-            codegen_->MaybeRecordImplicitNullCheck(instruction);
-          }
-          __ B(&done);
-          __ Bind(&non_zero);
-        }
-
-        // Note that when Baker read barriers are enabled, the type
-        // checks are performed without read barriers.  This is fine,
-        // even in the case where a class object is in the from-space
-        // after the flip, as a comparison involving such a type would
-        // not produce a false positive; it may of course produce a
-        // false negative, in which case we would take the ArraySet
-        // slow path.
-
-        Register temp2 = temps.AcquireSameSizeAs(array);
-        // /* HeapReference<Class> */ temp = array->klass_
-        {
-          // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
-          EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-          __ Ldr(temp, HeapOperand(array, class_offset));
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-        }
+      if (instruction->StaticTypeOfArrayIsObjectArray()) {
+        vixl::aarch64::Label do_put;
+        __ B(eq, &do_put);
+        // If heap poisoning is enabled, the `temp` reference has
+        // not been unpoisoned yet; unpoison it now.
         GetAssembler()->MaybeUnpoisonHeapReference(temp);
 
-        // /* HeapReference<Class> */ temp = temp->component_type_
-        __ Ldr(temp, HeapOperand(temp, component_offset));
-        // /* HeapReference<Class> */ temp2 = value->klass_
-        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-        // If heap poisoning is enabled, no need to unpoison `temp`
-        // nor `temp2`, as we are comparing two poisoned references.
-        __ Cmp(temp, temp2);
-        temps.Release(temp2);
-
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl::aarch64::Label do_put;
-          __ B(eq, &do_put);
-          // If heap poisoning is enabled, the `temp` reference has
-          // not been unpoisoned yet; unpoison it now.
-          GetAssembler()->MaybeUnpoisonHeapReference(temp);
-
-          // /* HeapReference<Class> */ temp = temp->super_class_
-          __ Ldr(temp, HeapOperand(temp, super_offset));
-          // If heap poisoning is enabled, no need to unpoison
-          // `temp`, as we are comparing against null below.
-          __ Cbnz(temp, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
-        } else {
-          __ B(ne, slow_path->GetEntryLabel());
-        }
-      }
-
-      if (kPoisonHeapReferences) {
-        Register temp2 = temps.AcquireSameSizeAs(array);
-          DCHECK(value.IsW());
-        __ Mov(temp2, value.W());
-        GetAssembler()->PoisonHeapReference(temp2);
-        source = temp2;
-      }
-
-      if (!index.IsConstant()) {
-        __ Add(temp, array, offset);
+        // /* HeapReference<Class> */ temp = temp->super_class_
+        __ Ldr(temp, HeapOperand(temp, super_offset));
+        // If heap poisoning is enabled, no need to unpoison
+        // `temp`, as we are comparing against null below.
+        __ Cbnz(temp, slow_path->GetEntryLabel());
+        __ Bind(&do_put);
       } else {
-        // We no longer need the `temp` here so release it as the store below may
-        // need a scratch register (if the constant index makes the offset too large)
-        // and the poisoned `source` could be using the other scratch register.
-        temps.Release(temp);
-      }
-      {
-        // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
-        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-        __ Str(source, destination);
-
-        if (!may_need_runtime_call_for_type_check) {
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-        }
+        __ B(ne, slow_path->GetEntryLabel());
       }
     }
 
-    codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
+    codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
 
-    if (done.IsLinked()) {
-      __ Bind(&done);
+    if (can_value_be_null) {
+      DCHECK(do_store.IsLinked());
+      __ Bind(&do_store);
+    }
+
+    UseScratchRegisterScope temps(masm);
+    if (kPoisonHeapReferences) {
+      Register temp_source = temps.AcquireSameSizeAs(array);
+        DCHECK(value.IsW());
+      __ Mov(temp_source, value.W());
+      GetAssembler()->PoisonHeapReference(temp_source);
+      source = temp_source;
+    }
+
+    if (index.IsConstant()) {
+      offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
+      destination = HeapOperand(array, offset);
+    } else {
+      Register temp_base = temps.AcquireSameSizeAs(array);
+      __ Add(temp_base, array, offset);
+      destination = HeapOperand(temp_base,
+                                XRegisterFrom(index),
+                                LSL,
+                                DataType::SizeShift(value_type));
+    }
+
+    {
+      // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
+      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+      __ Str(source, destination);
+
+      if (can_value_be_null || !needs_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
     }
 
     if (slow_path != nullptr) {