MIPS32R2: Share address computation

For array accesses the element address has the following structure:
Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT

The address part (index << ELEM_SHIFT) can be shared across array
accesses with the same data type and index.

For example, in the following loop 5 accesses can share address
computation:

void foo(int[] a, int[] b, int[] c) {
  for (i...) {
    a[i] = a[i] + 5;
    b[i] = b[i] + c[i];
  }
}

Test: test-art-host, test-art-target
Change-Id: Id09fa782934aad4ee47669275e7e1a4d7d23b0fa
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 3044890..59ca4c7 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -135,6 +135,7 @@
                 "linker/mips/relative_patcher_mips.cc",
                 "optimizing/code_generator_mips.cc",
                 "optimizing/code_generator_vector_mips.cc",
+                "optimizing/instruction_simplifier_mips.cc",
                 "optimizing/intrinsics_mips.cc",
                 "optimizing/pc_relative_fixups_mips.cc",
                 "utils/mips/assembler_mips.cc",
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 70c8a5b..3c592e7 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2665,6 +2665,9 @@
           __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP);
           __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
           __ Bind(&done);
+        } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+          __ Addu(TMP, index_reg, obj);
+          __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
         } else {
           __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP);
           __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
@@ -2679,6 +2682,9 @@
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
         __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(TMP, index.AsRegister<Register>(), obj);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
       } else {
         __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP);
         __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
@@ -2693,6 +2699,9 @@
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
         __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(TMP, index.AsRegister<Register>(), obj);
+        __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
       } else {
         __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP);
         __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
@@ -2766,6 +2775,9 @@
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(TMP, index.AsRegister<Register>(), obj);
+        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
       } else {
         __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP);
         __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
@@ -2779,6 +2791,9 @@
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
         __ LoadSFromOffset(out, obj, offset, null_checker);
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(TMP, index.AsRegister<Register>(), obj);
+        __ LoadSFromOffset(out, TMP, data_offset, null_checker);
       } else {
         __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP);
         __ LoadSFromOffset(out, TMP, data_offset, null_checker);
@@ -2792,6 +2807,9 @@
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         __ LoadDFromOffset(out, obj, offset, null_checker);
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(TMP, index.AsRegister<Register>(), obj);
+        __ LoadDFromOffset(out, TMP, data_offset, null_checker);
       } else {
         __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP);
         __ LoadDFromOffset(out, TMP, data_offset, null_checker);
@@ -2906,6 +2924,8 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(base_reg, index.AsRegister<Register>(), obj);
       } else {
         __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_2, base_reg);
       }
@@ -2923,6 +2943,8 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(base_reg, index.AsRegister<Register>(), obj);
       } else {
         __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
       }
@@ -2972,6 +2994,8 @@
           uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
           if (index.IsConstant()) {
             data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+          } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+            __ Addu(base_reg, index.AsRegister<Register>(), obj);
           } else {
             __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
           }
@@ -3055,6 +3079,8 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(base_reg, index.AsRegister<Register>(), obj);
       } else {
         __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg);
       }
@@ -3072,6 +3098,8 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(base_reg, index.AsRegister<Register>(), obj);
       } else {
         __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
       }
@@ -3089,6 +3117,8 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
+      } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) {
+        __ Addu(base_reg, index.AsRegister<Register>(), obj);
       } else {
         __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg);
       }
@@ -3108,6 +3138,26 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitIntermediateArrayAddressIndex(
+    HIntermediateArrayAddressIndex* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  HIntConstant* shift = instruction->GetShift()->AsIntConstant();
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::ConstantLocation(shift));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS::VisitIntermediateArrayAddressIndex(
+    HIntermediateArrayAddressIndex* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register index_reg = locations->InAt(0).AsRegister<Register>();
+  uint32_t shift = instruction->GetShift()->AsIntConstant()->GetValue();
+  __ Sll(locations->Out().AsRegister<Register>(), index_reg, shift);
+}
+
 void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
   RegisterSet caller_saves = RegisterSet::Empty();
   InvokeRuntimeCallingConvention calling_convention;
diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc
new file mode 100644
index 0000000..4bf1bfb
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_mips.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_mips.h"
+
+#include "arch/mips/instruction_set_features_mips.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace mips {
+
+class InstructionSimplifierMipsVisitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierMipsVisitor(HGraph* graph,
+                                   CodeGenerator* codegen,
+                                   OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph),
+        stats_(stats),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  bool TryExtractArrayAccessIndex(HInstruction* access,
+                                  HInstruction* index,
+                                  DataType::Type packed_type);
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+  CodeGeneratorMIPS* codegen_;
+};
+
+bool InstructionSimplifierMipsVisitor::TryExtractArrayAccessIndex(HInstruction* access,
+                                                                  HInstruction* index,
+                                                                  DataType::Type packed_type) {
+  if (codegen_->GetInstructionSetFeatures().IsR6() ||
+      codegen_->GetInstructionSetFeatures().HasMsa()) {
+    return false;
+  }
+  if (index->IsConstant() ||
+      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+    // If index is constant the whole address calculation often can be done by load/store
+    // instructions themselves.
+    // TODO: Treat the case with non-embeddable constants.
+    return false;
+  }
+
+  if (packed_type != DataType::Type::kInt16 && packed_type != DataType::Type::kUint16 &&
+      packed_type != DataType::Type::kInt32 && packed_type != DataType::Type::kInt64 &&
+      packed_type != DataType::Type::kFloat32 && packed_type != DataType::Type::kFloat64) {
+    return false;
+  }
+
+  if (access->IsArrayGet() && access->AsArrayGet()->IsStringCharAt()) {
+    return false;
+  }
+
+  HGraph* graph = access->GetBlock()->GetGraph();
+  ArenaAllocator* arena = graph->GetArena();
+  size_t component_shift = DataType::SizeShift(packed_type);
+
+  bool is_extracting_beneficial = false;
+  // It is beneficial to extract index intermediate address only if there are at least 2 users.
+  for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (user->IsArrayGet() && user != access && !user->AsArrayGet()->IsStringCharAt()) {
+      HArrayGet* another_access = user->AsArrayGet();
+      DataType::Type another_packed_type = another_access->GetType();
+      size_t another_component_shift = DataType::SizeShift(another_packed_type);
+      if (another_component_shift == component_shift) {
+        is_extracting_beneficial = true;
+        break;
+      }
+    } else if (user->IsArraySet() && user != access) {
+      HArraySet* another_access = user->AsArraySet();
+      DataType::Type another_packed_type = another_access->GetType();
+      size_t another_component_shift = DataType::SizeShift(another_packed_type);
+      if (another_component_shift == component_shift) {
+        is_extracting_beneficial = true;
+        break;
+      }
+    } else if (user->IsIntermediateArrayAddressIndex()) {
+      HIntermediateArrayAddressIndex* another_access = user->AsIntermediateArrayAddressIndex();
+      size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
+      if (another_component_shift == component_shift) {
+        is_extracting_beneficial = true;
+        break;
+      }
+    }
+  }
+
+  if (!is_extracting_beneficial) {
+    return false;
+  }
+
+  HIntConstant* shift = graph->GetIntConstant(component_shift);
+  HIntermediateArrayAddressIndex* address =
+      new (arena) HIntermediateArrayAddressIndex(index, shift, kNoDexPc);
+  access->GetBlock()->InsertInstructionBefore(address, access);
+  access->ReplaceInput(address, 1);
+  return true;
+}
+
+void InstructionSimplifierMipsVisitor::VisitArrayGet(HArrayGet* instruction) {
+  DataType::Type packed_type = instruction->GetType();
+  if (TryExtractArrayAccessIndex(instruction, instruction->GetIndex(), packed_type)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierMipsVisitor::VisitArraySet(HArraySet* instruction) {
+  DataType::Type packed_type = instruction->GetComponentType();
+  if (TryExtractArrayAccessIndex(instruction, instruction->GetIndex(), packed_type)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierMips::Run() {
+  InstructionSimplifierMipsVisitor visitor(graph_, codegen_, stats_);
+  visitor.VisitReversePostOrder();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h
new file mode 100644
index 0000000..22cc2ef
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_mips.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+#include "code_generator_mips.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace mips {
+
+class InstructionSimplifierMips : public HOptimization {
+ public:
+  InstructionSimplifierMips(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "instruction_simplifier_mips", stats),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {}
+
+  static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips";
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGeneratorMIPS* codegen_;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 411af2a..fef0c86 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1423,7 +1423,8 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                           \
   M(MipsComputeBaseMethodAddress, Instruction)                          \
-  M(MipsPackedSwitch, Instruction)
+  M(MipsPackedSwitch, Instruction)                                      \
+  M(IntermediateArrayAddressIndex, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
index 80e652e..ef388c3 100644
--- a/compiler/optimizing/nodes_mips.h
+++ b/compiler/optimizing/nodes_mips.h
@@ -69,6 +69,46 @@
   DISALLOW_COPY_AND_ASSIGN(HMipsPackedSwitch);
 };
 
+// This instruction computes part of the array access offset (index offset).
+//
+// For array accesses the element address has the following structure:
+// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. The address part
+// (index << ELEM_SHIFT) can be shared across array accesses with
+// the same data type and index. For example, in the following loop 5 accesses can share address
+// computation:
+//
+// void foo(int[] a, int[] b, int[] c) {
+//   for (i...) {
+//     a[i] = a[i] + 5;
+//     b[i] = b[i] + c[i];
+//   }
+// }
+//
+// Note: as the instruction doesn't involve base array address into computations it has no side
+// effects.
+class HIntermediateArrayAddressIndex FINAL : public HExpression<2> {
+ public:
+  HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc)
+      : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) {
+    SetRawInputAt(0, index);
+    SetRawInputAt(1, shift);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+  bool IsActualObject() const OVERRIDE { return false; }
+
+  HInstruction* GetIndex() const { return InputAt(0); }
+  HInstruction* GetShift() const { return InputAt(1); }
+
+  DECLARE_INSTRUCTION(IntermediateArrayAddressIndex);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HIntermediateArrayAddressIndex);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 1218586..1e06ea8 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -27,6 +27,7 @@
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_mips
+#include "instruction_simplifier_mips.h"
 #include "pc_relative_fixups_mips.h"
 #endif
 
@@ -528,6 +529,8 @@
 #ifdef ART_ENABLE_CODEGEN_mips
   } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
     return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+  } else if (opt_name == mips::InstructionSimplifierMips::kInstructionSimplifierMipsPassName) {
+    return new (arena) mips::InstructionSimplifierMips(graph, codegen, stats);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
   } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
@@ -669,11 +672,14 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips: {
+      mips::InstructionSimplifierMips* simplifier =
+          new (arena) mips::InstructionSimplifierMips(graph, codegen, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
       GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
       mips::PcRelativeFixups* pc_relative_fixups =
           new (arena) mips::PcRelativeFixups(graph, codegen, stats);
       HOptimization* mips_optimizations[] = {
+          simplifier,
           side_effects,
           gvn,
           pc_relative_fixups,