ART: Transform Sub+Sub into Sub+Add to merge Shl

In the instruction sequence like the following:
  t1 = Shl(a, n)
  t2 = Sub(t1, *)
  r  = Sub(*, t2)
Shl cannot be merged with Sub. However it can be done when the first Sub
operands are reordered and the second Sub is replaced with Add:
  t1 = Shl(a, n)
  t2 = Sub(*, t1)
  r  = Add(*, t2)

This CL implements this transformation in the ARM/ARM64 instruction simplifiers.

Test: 411-checker-instruct-simplifier-hrem
Test: test.py --host --optimizing --jit --gtest --interpreter
Test: test.py --target --optimizing --jit --interpreter
Test: run-gtests.sh
Change-Id: I24fde29d307f3ad53a8df8bbafe945b4f733ce6c
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 01e9cff..1371ea7 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -27,6 +27,7 @@
 
 using helpers::CanFitInShifterOperand;
 using helpers::HasShifterOperand;
+using helpers::IsSubRightSubLeftShl;
 
 namespace arm {
 
@@ -73,6 +74,7 @@
   void VisitOr(HOr* instruction) override;
   void VisitShl(HShl* instruction) override;
   void VisitShr(HShr* instruction) override;
+  void VisitSub(HSub* instruction) override;
   void VisitTypeConversion(HTypeConversion* instruction) override;
   void VisitUShr(HUShr* instruction) override;
 
@@ -268,6 +270,15 @@
   }
 }
 
+void InstructionSimplifierArmVisitor::VisitSub(HSub* instruction) {
+  if (IsSubRightSubLeftShl(instruction)) {
+    HInstruction* shl = instruction->GetRight()->InputAt(0);
+    if (shl->InputAt(1)->IsConstant() && TryReplaceSubSubWithSubAdd(instruction)) {
+      TryMergeIntoUsersShifterOperand(shl);
+    }
+  }
+}
+
 void InstructionSimplifierArmVisitor::VisitTypeConversion(HTypeConversion* instruction) {
   DataType::Type result_type = instruction->GetResultType();
   DataType::Type input_type = instruction->GetInputType();
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index e23decb..260bfaf 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -25,6 +25,7 @@
 
 using helpers::CanFitInShifterOperand;
 using helpers::HasShifterOperand;
+using helpers::IsSubRightSubLeftShl;
 
 namespace arm64 {
 
@@ -76,6 +77,7 @@
   void VisitOr(HOr* instruction) override;
   void VisitShl(HShl* instruction) override;
   void VisitShr(HShr* instruction) override;
+  void VisitSub(HSub* instruction) override;
   void VisitTypeConversion(HTypeConversion* instruction) override;
   void VisitUShr(HUShr* instruction) override;
   void VisitXor(HXor* instruction) override;
@@ -239,6 +241,15 @@
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitSub(HSub* instruction) {
+  if (IsSubRightSubLeftShl(instruction)) {
+    HInstruction* shl = instruction->GetRight()->InputAt(0);
+    if (shl->InputAt(1)->IsConstant() && TryReplaceSubSubWithSubAdd(instruction)) {
+      TryMergeIntoUsersShifterOperand(shl);
+    }
+  }
+}
+
 void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) {
   DataType::Type result_type = instruction->GetResultType();
   DataType::Type input_type = instruction->GetInputType();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 0f30f66..dc60ba6 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -336,4 +336,26 @@
   return true;
 }
 
+bool TryReplaceSubSubWithSubAdd(HSub* last_sub) {
+  DCHECK(last_sub->GetRight()->IsSub());
+  HBasicBlock* basic_block = last_sub->GetBlock();
+  ArenaAllocator* allocator = basic_block->GetGraph()->GetAllocator();
+  HInstruction* last_sub_right = last_sub->GetRight();
+  HInstruction* last_sub_left = last_sub->GetLeft();
+  if (last_sub_right->GetUses().HasExactlyOneElement()) {
+    // Reorder operands of last_sub_right: Sub(a, b) -> Sub(b, a).
+    HInstruction* a = last_sub_right->InputAt(0);
+    HInstruction* b = last_sub_right->InputAt(1);
+    last_sub_right->ReplaceInput(b, 0);
+    last_sub_right->ReplaceInput(a, 1);
+
+    // Replace Sub(c, Sub(a, b)) with Add(c, Sub(b, a).
+    HAdd* add = new (allocator) HAdd(last_sub->GetType(), last_sub_left, last_sub_right);
+    basic_block->ReplaceAndRemoveInstructionWith(last_sub, add);
+    return true;
+  } else {
+    return false;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 758fc76..876ed21 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -47,6 +47,15 @@
   return res;
 }
 
+// Check the specified sub is the last operation of the sequence:
+//   t1 = Shl
+//   t2 = Sub(t1, *)
+//   t3 = Sub(*, t2)
+inline bool IsSubRightSubLeftShl(HSub *sub) {
+  HInstruction* right = sub->GetRight();
+  return right->IsSub() && right->AsSub()->GetLeft()->IsShl();;
+}
+
 }  // namespace helpers
 
 bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
@@ -61,6 +70,12 @@
 
 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
 
+// Try to replace
+//   Sub(c, Sub(a, b))
+// with
+//   Add(c, Sub(b, a))
+bool TryReplaceSubSubWithSubAdd(HSub* last_sub);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/test/411-checker-instruct-simplifier-hrem/src/Main.java b/test/411-checker-instruct-simplifier-hrem/src/Main.java
index 6684557..e6853dd 100644
--- a/test/411-checker-instruct-simplifier-hrem/src/Main.java
+++ b/test/411-checker-instruct-simplifier-hrem/src/Main.java
@@ -206,12 +206,27 @@
   /// CHECK-NEXT:      Sub
   /// CHECK-NEXT:      Sub
   //
+  /// CHECK-START-ARM64: long Main.$noinline$IntDivRemBy7(int) instruction_simplifier_arm64 (after)
+  /// CHECK:                 Div
+  /// CHECK-NEXT:            DataProcWithShifterOp
+  /// CHECK-NEXT:            Add
+  //
   /// CHECK-START-ARM64: long Main.$noinline$IntDivRemBy7(int) disassembly (after)
   /// CHECK:                 asr x{{\d+}}, x{{\d+}}, #34
   /// CHECK-NEXT:            cinc w{{\d+}}, w{{\d+}}, mi
-  /// CHECK:                 lsl w{{\d+}}, w{{\d+}}, #3
-  /// CHECK:                 sub w{{\d+}}, w{{\d+}}, w{{\d+}}
-  /// CHECK:                 sub w{{\d+}}, w{{\d+}}, w{{\d+}}
+  /// CHECK:                 sub w{{\d+}}, w{{\d+}}, w{{\d+}}, lsl #3
+  /// CHECK:                 add w{{\d+}}, w{{\d+}}, w{{\d+}}
+  //
+  /// CHECK-START-ARM: long Main.$noinline$IntDivRemBy7(int) instruction_simplifier_arm (after)
+  /// CHECK:               Div
+  /// CHECK-NEXT:          DataProcWithShifterOp
+  /// CHECK-NEXT:          Add
+  //
+  /// CHECK-START-ARM: long Main.$noinline$IntDivRemBy7(int) disassembly (after)
+  /// CHECK:               asr{{s?}} r{{\d+}}, #2
+  /// CHECK-NEXT:          sub       r{{\d+}}, r{{\d+}}, r{{\d+}}, asr #31
+  /// CHECK:               sub       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsl #3
+  /// CHECK:               add{{s?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
   private static long $noinline$IntDivRemBy7(int v) {
     int q = v / 7;
     int r = v % 7;
@@ -245,7 +260,7 @@
   //
   /// CHECK-START: long Main.$noinline$IntDivRemByMaxInt(int) instruction_simplifier (before)
   /// CHECK:           Div
-  /// CHECK:           Rem
+  /// CHECK-NEXT:      Rem
   //
   /// CHECK-START: long Main.$noinline$IntDivRemByMaxInt(int) instruction_simplifier (after)
   /// CHECK:           Div
@@ -253,12 +268,27 @@
   /// CHECK-NEXT:      Sub
   /// CHECK-NEXT:      Sub
   //
+  /// CHECK-START-ARM64: long Main.$noinline$IntDivRemByMaxInt(int) instruction_simplifier_arm64 (after)
+  /// CHECK:                 Div
+  /// CHECK-NEXT:            DataProcWithShifterOp
+  /// CHECK-NEXT:            Add
+  //
   /// CHECK-START-ARM64: long Main.$noinline$IntDivRemByMaxInt(int) disassembly (after)
   /// CHECK:                 asr x{{\d+}}, x{{\d+}}, #61
   /// CHECK-NEXT:            add w{{\d+}}, w{{\d+}}, w{{\d+}}, lsr #31
-  /// CHECK:                 lsl w{{\d+}}, w{{\d+}}, #31
-  /// CHECK:                 sub w{{\d+}}, w{{\d+}}, w{{\d+}}
-  /// CHECK:                 sub w{{\d+}}, w{{\d+}}, w{{\d+}}
+  /// CHECK:                 sub w{{\d+}}, w{{\d+}}, w{{\d+}}, lsl #31
+  /// CHECK:                 add w{{\d+}}, w{{\d+}}, w{{\d+}}
+  //
+  /// CHECK-START-ARM: long Main.$noinline$IntDivRemByMaxInt(int) instruction_simplifier_arm (after)
+  /// CHECK:               Div
+  /// CHECK-NEXT:          DataProcWithShifterOp
+  /// CHECK-NEXT:          Add
+  //
+  /// CHECK-START-ARM: long Main.$noinline$IntDivRemByMaxInt(int) disassembly (after)
+  /// CHECK:               asr{{s?}}  r{{\d+}}, #29
+  /// CHECK-NEXT:          sub        r{{\d+}}, r{{\d+}}, r{{\d+}}, asr #31
+  /// CHECK:               sub        r{{\d+}}, r{{\d+}}, r{{\d+}}, lsl #31
+  /// CHECK:               add{{s?}}  r{{\d+}}, r{{\d+}}, r{{\d+}}
   private static long $noinline$IntDivRemByMaxInt(int v) {
     int q = v / Integer.MAX_VALUE;
     int r = v % Integer.MAX_VALUE;
@@ -590,12 +620,30 @@
   /// CHECK-NEXT:      Sub
   /// CHECK-NEXT:      Sub
   //
+  /// CHECK-START-ARM64: long[] Main.$noinline$LongDivRemBy7(long) instruction_simplifier_arm64 (after)
+  /// CHECK:                 Div
+  /// CHECK-NEXT:            DataProcWithShifterOp
+  /// CHECK-NEXT:            Add
+  //
   /// CHECK-START-ARM64: long[] Main.$noinline$LongDivRemBy7(long) disassembly (after)
   /// CHECK:                 asr x{{\d+}}, x{{\d+}}, #1
   /// CHECK-NEXT:            add x{{\d+}}, x{{\d+}}, x{{\d+}}, lsr #63
-  /// CHECK:                 lsl x{{\d+}}, x{{\d+}}, #3
-  /// CHECK:                 sub x{{\d+}}, x{{\d+}}, x{{\d+}}
-  /// CHECK:                 sub x{{\d+}}, x{{\d+}}, x{{\d+}}
+  /// CHECK:                 sub x{{\d+}}, x{{\d+}}, x{{\d+}}, lsl #3
+  /// CHECK:                 add x{{\d+}}, x{{\d+}}, x{{\d+}}
+  //
+  /// CHECK-START-ARM: long[] Main.$noinline$LongDivRemBy7(long) instruction_simplifier_arm (after)
+  /// CHECK:               Div
+  /// CHECK-NEXT:          DataProcWithShifterOp
+  /// CHECK-NEXT:          Add
+  //
+  /// CHECK-START-ARM: long[] Main.$noinline$LongDivRemBy7(long) disassembly (after)
+  /// CHECK:               blx lr
+  //  CHECK:               lsl ip, r{{\d}}, #3
+  //  CHECK-NEXT:          orr       ip, r{{\d}}, lsr #29
+  //  CHECK-NEXT:          sub       r{{\d}}, r{{\d}}, r{{\d}}, lsl #3
+  //  CHECK-NEXT:          sbc{{s?}} r{{\d}}, r{{\d}}, ip
+  /// CHECK:               add{{s?}} r{{\d}}, r{{\d}}
+  /// CHECK-NEXT:          adc{{s?}} r{{\d}}, r{{\d}}
   private static long[] $noinline$LongDivRemBy7(long v) {
     long q = v / 7L;
     long r = v % 7L;
@@ -637,12 +685,28 @@
   /// CHECK-NEXT:      Sub
   /// CHECK-NEXT:      Sub
   //
+  /// CHECK-START-ARM64: long[] Main.$noinline$LongDivRemByMaxLong(long) instruction_simplifier_arm64 (after)
+  /// CHECK:           Div
+  /// CHECK-NEXT:      DataProcWithShifterOp
+  /// CHECK-NEXT:      Add
+  //
   /// CHECK-START-ARM64: long[] Main.$noinline$LongDivRemByMaxLong(long) disassembly (after)
   /// CHECK:                 asr x{{\d+}}, x{{\d+}}, #61
   /// CHECK-NEXT:            add x{{\d+}}, x{{\d+}}, x{{\d+}}, lsr #63
-  /// CHECK:                 lsl x{{\d+}}, x{{\d+}}, #63
-  /// CHECK:                 sub x{{\d+}}, x{{\d+}}, x{{\d+}}
-  /// CHECK:                 sub x{{\d+}}, x{{\d+}}, x{{\d+}}
+  /// CHECK:                 sub x{{\d+}}, x{{\d+}}, x{{\d+}}, lsl #63
+  /// CHECK:                 add x{{\d+}}, x{{\d+}}, x{{\d+}}
+  //
+  /// CHECK-START-ARM: long[] Main.$noinline$LongDivRemByMaxLong(long) instruction_simplifier_arm (after)
+  /// CHECK:           Div
+  /// CHECK-NEXT:      DataProcWithShifterOp
+  /// CHECK-NEXT:      Add
+  //
+  /// CHECK-START-ARM: long[] Main.$noinline$LongDivRemByMaxLong(long) disassembly (after)
+  /// CHECK:               blx lr
+  //  CHECK:               sub       r{{\d}}, r{{\d}}, r{{\d}}, lsl #31
+  //  CHECK-NEXT:          mov{{s?}} r{{\d}}, r{{\d}}
+  /// CHECK:               add{{s?}} r{{\d}}, r{{\d}}
+  /// CHECK-NEXT:          adc{{s?}} r{{\d}}, r{{\d}}
   private static long[] $noinline$LongDivRemByMaxLong(long v) {
     long q = v / Long.MAX_VALUE;
     long r = v % Long.MAX_VALUE;