diff options
author | 2024-06-27 17:46:39 +0500 | |
---|---|---|
committer | 2024-07-17 12:37:07 +0000 | |
commit | 57fe2140f6a78a66cf80c9b46e96acc6b83ec3ed (patch) | |
tree | 535f5a91d38c0c203dd969f02765ee5e6617b943 | |
parent | 91c9502fdf437fce5596631e50db5e70c034312a (diff) |
riscv64: Extend Shl+Add optimization for many Adds
Extend instruction_simplifier_riscv64 so it can optimize not only one
non environment Add use of Shl<1|2|3>.
Replaces every Add use of Shl<1|2|3> with Riscv64ShiftAdd even if there
are other uses. If Shl has no any uses then remove it.
Even if there is not only an Add among Shl uses such an extension gives
a gain. sh<1|2|3>add takes one clock cycle therefore 2 independent
shifts (e.g. slli and sh<1|2|3>add) can go parallel on a dual-issue
processor.
By compiling with dex2oat all the methods of applications below I got:
Facebook: +20% (+9) cases
TikTok: +15% (+4) cases (pattern detections) of the optimization.
Test: art/test/testrunner/testrunner.py --target --64 --ndebug --optimizing
Change-Id: I2c194ad74f1b8c60f8c926894c389af61333ec28
-rw-r--r-- | compiler/optimizing/instruction_simplifier_riscv64.cc | 74 | ||||
-rw-r--r-- | test/458-checker-riscv64-shift-add/src/Main.java | 131 |
2 files changed, 178 insertions, 27 deletions
diff --git a/compiler/optimizing/instruction_simplifier_riscv64.cc b/compiler/optimizing/instruction_simplifier_riscv64.cc index 8f47f66d8b..b12941b37d 100644 --- a/compiler/optimizing/instruction_simplifier_riscv64.cc +++ b/compiler/optimizing/instruction_simplifier_riscv64.cc @@ -41,51 +41,60 @@ class InstructionSimplifierRiscv64Visitor final : public HGraphVisitor { } } - bool TryReplaceShiftAddWithOneInstruction(HShl* shl, HAdd* add) { + // Replace Add which has Shl with distance of 1 or 2 or 3 with Riscv64ShiftAdd + bool TryReplaceAddsWithShiftAdds(HShl* shl) { // There is no reason to replace Int32 Shl+Add with ShiftAdd because of // additional sign-extension required. if (shl->GetType() != DataType::Type::kInt64) { return false; } - if (!shl->GetRight()->IsIntConstant()) { + if (!shl->GetRight()->IsConstant()) { return false; } + // The bytecode does not permit the shift distance to come from a wide variable + DCHECK(shl->GetRight()->IsIntConstant()); + const int32_t distance = shl->GetRight()->AsIntConstant()->GetValue(); - if (distance != 1 && distance != 2 && distance != 3) { + if ((distance & ~0x3) != 0) { return false; } - if (!shl->HasOnlyOneNonEnvironmentUse()) { - return false; - } + bool replaced = false; - auto* const add_other_input = add->GetLeft() == shl ? add->GetRight() : add->GetLeft(); - auto* const shift_add = new (GetGraph()->GetAllocator()) - HRiscv64ShiftAdd(shl->GetLeft(), add_other_input, distance); + for (const HUseListNode<HInstruction*>& use : shl->GetUses()) { + HInstruction* user = use.GetUser(); - DCHECK_EQ(add->GetType(), DataType::Type::kInt64) - << "Riscv64ShiftAdd replacement should have the same 64 bit type"; - add->GetBlock()->ReplaceAndRemoveInstructionWith(add, shift_add); - shl->GetBlock()->RemoveInstruction(shl); + if (!user->IsAdd()) { + continue; + } + HAdd* add = user->AsAdd(); + HInstruction* left = add->GetLeft(); + HInstruction* right = add->GetRight(); + DCHECK_EQ(add->GetType(), DataType::Type::kInt64) + << "Replaceable Add must be the same 64 bit type as the input"; + + // If the HAdd to replace has both inputs the same HShl<1|2|3>, then + // don't perform the optimization. The processor will not be able to execute + // these shifts parallel which is the purpose of the replace below. + if (left == right) { + continue; + } - return true; - } + HInstruction* add_other_input = left == shl ? right : left; + HRiscv64ShiftAdd* shift_add = new (GetGraph()->GetAllocator()) + HRiscv64ShiftAdd(shl->GetLeft(), add_other_input, distance); - // Replace code looking like - // SHL tmp, a, 1 or 2 or 3 - // ADD dst, tmp, b - // with - // Riscv64ShiftAdd dst, a, b - void VisitAdd(HAdd* add) override { - auto* const left = add->GetLeft(); - auto* const right = add->GetRight(); - if (left->IsShl() && TryReplaceShiftAddWithOneInstruction(left->AsShl(), add)) { - return; - } else if (right->IsShl() && TryReplaceShiftAddWithOneInstruction(right->AsShl(), add)) { - return; + add->GetBlock()->ReplaceAndRemoveInstructionWith(add, shift_add); + replaced = true; } + + if (!shl->HasUses()) { + shl->GetBlock()->RemoveInstruction(shl); + } + + return replaced; } void VisitAnd(HAnd* inst) override { @@ -100,6 +109,17 @@ class InstructionSimplifierRiscv64Visitor final : public HGraphVisitor { } } + // Replace code looking like + // SHL tmp, a, 1 or 2 or 3 + // ADD dst, tmp, b + // with + // Riscv64ShiftAdd dst, a, b + void VisitShl(HShl* inst) override { + if (TryReplaceAddsWithShiftAdds(inst)) { + RecordSimplification(); + } + } + void VisitSub(HSub* inst) override { if (TryMergeWithAnd(inst)) { RecordSimplification(); diff --git a/test/458-checker-riscv64-shift-add/src/Main.java b/test/458-checker-riscv64-shift-add/src/Main.java index b78b9551b8..f000c0ce4e 100644 --- a/test/458-checker-riscv64-shift-add/src/Main.java +++ b/test/458-checker-riscv64-shift-add/src/Main.java @@ -207,6 +207,20 @@ public class Main { return (a << 1) + (b << 4); } + /// CHECK-START-RISCV64: long Main.$noinline$longShiftTooLittleDistance(long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Riscv64ShiftAdd + + public static long $noinline$longShiftTooLittleDistance(long a, long b) { + return (a << 0) + b; + } + + /// CHECK-START-RISCV64: long Main.$noinline$longShiftTooGreatDistance(long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Riscv64ShiftAdd + + public static long $noinline$longShiftTooGreatDistance(long a, long b) { + return (a << 4) + b; + } + /// CHECK-START-RISCV64: long Main.$noinline$longTwoSimplifications(long, long) instruction_simplifier_riscv64 (before) /// CHECK: <<A:j\d+>> ParameterValue /// CHECK: <<B:j\d+>> ParameterValue @@ -234,6 +248,99 @@ public class Main { return x ^ y; } + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsUseShl(long, long, long) instruction_simplifier_riscv64 (before) + /// CHECK: <<A:j\d+>> ParameterValue + /// CHECK: <<B:j\d+>> ParameterValue + /// CHECK: <<C:j\d+>> ParameterValue + /// CHECK: <<One:i\d+>> IntConstant 1 + /// CHECK: <<Shl:j\d+>> Shl [<<A>>,<<One>>] + /// CHECK: <<X:j\d+>> Add [<<B>>,<<Shl>>] + /// CHECK: <<Y:j\d+>> Add [<<C>>,<<Shl>>] + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsUseShl(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK: <<A:j\d+>> ParameterValue + /// CHECK: <<B:j\d+>> ParameterValue + /// CHECK: <<C:j\d+>> ParameterValue + /// CHECK-DAG: <<ShAdd1:j\d+>> Riscv64ShiftAdd [<<A>>,<<B>>] distance:1 + /// CHECK-DAG: <<ShAdd2:j\d+>> Riscv64ShiftAdd [<<A>>,<<C>>] distance:1 + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsUseShl(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Shl + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsUseShl(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Add + + public static long $noinline$longTwoAddsUseShl(long a, long b, long c) { + long shl = a << 1; + long x = shl + b; + long y = shl + c; + return x ^ y; + } + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsMixedOrderUseShl(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK-DAG: Riscv64ShiftAdd + /// CHECK-DAG: Riscv64ShiftAdd + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsMixedOrderUseShl(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Shl + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoAddsMixedOrderUseShl(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Add + + public static long $noinline$longTwoAddsMixedOrderUseShl(long a, long b, long c) { + long x = (a << 1) + b; + long y = c + (a << 1); + return x ^ y; + } + + /// CHECK-START-RISCV64: long Main.$noinline$longOneAddSharesShlUse(long, long, long) instruction_simplifier_riscv64 (before) + /// CHECK: <<A:j\d+>> ParameterValue + /// CHECK: <<B:j\d+>> ParameterValue + /// CHECK: <<C:j\d+>> ParameterValue + /// CHECK: <<One:i\d+>> IntConstant 1 + /// CHECK: <<Shl:j\d+>> Shl [<<A>>,<<One>>] + /// CHECK: <<X:j\d+>> Add [<<B>>,<<Shl>>] + /// CHECK: <<Y:j\d+>> Sub [<<Shl>>,<<C>>] + + /// CHECK-START-RISCV64: long Main.$noinline$longOneAddSharesShlUse(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK: <<A:j\d+>> ParameterValue + /// CHECK: <<B:j\d+>> ParameterValue + /// CHECK: <<C:j\d+>> ParameterValue + /// CHECK: <<One:i\d+>> IntConstant 1 + /// CHECK: <<Shl:j\d+>> Shl [<<A>>,<<One>>] + /// CHECK-DAG: <<ShAdd:j\d+>> Riscv64ShiftAdd [<<A>>,<<B>>] distance:1 + /// CHECK: <<Y:j\d+>> Sub [<<Shl>>,<<C>>] + + /// CHECK-START-RISCV64: long Main.$noinline$longOneAddSharesShlUse(long, long, long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Add + + public static long $noinline$longOneAddSharesShlUse(long a, long b, long c) { + long shl = a << 1; + long x = shl + b; + long y = shl - c; + return x ^ y; + } + + public static void $noinline$returnVoid(long a) {} + + /// CHECK-START-RISCV64: long Main.$noinline$longOneAddSharesShlEnvironmentUse(long, long) instruction_simplifier_riscv64 (after) + /// CHECK: Shl + /// CHECK-DAG: Riscv64ShiftAdd + + public static long $noinline$longOneAddSharesShlEnvironmentUse(long a, long b) { + long shl = a << 1; + long x = shl + b; + $noinline$returnVoid(shl); + return x; + } + + /// CHECK-START-RISCV64: long Main.$noinline$longTwoTheSameShl(long) instruction_simplifier_riscv64 (after) + /// CHECK-NOT: Riscv64ShiftAdd + + public static long $noinline$longTwoTheSameShl(long a) { + return (a << 1) + (a << 1); + } + public static void main(String[] args) { assertIntEquals(0, $noinline$intRiscvShift1Add(0, 0)); @@ -270,6 +377,30 @@ public class Main { assertLongEquals(2L, $noinline$longLeftShift(1L, 0L)); assertLongEquals(4L, $noinline$longLeftShift(2L, 0L)); + assertLongEquals(1L, $noinline$longShiftTooLittleDistance(1L, 0L)); + assertLongEquals(2L, $noinline$longShiftTooLittleDistance(2L, 0L)); + + assertLongEquals(17L, $noinline$longShiftTooGreatDistance(1L, 1L)); + assertLongEquals(32L, $noinline$longShiftTooGreatDistance(2L, 0L)); + assertLongEquals(6L, $noinline$longTwoSimplifications(1L, 1L)); + + assertLongEquals(0L, $noinline$longTwoAddsUseShl(1L, 1L, 1L)); + assertLongEquals(1L, $noinline$longTwoAddsUseShl(1L, 0L, 1L)); + assertLongEquals(3L, $noinline$longTwoAddsUseShl(0L, 1L, 2L)); + assertLongEquals(7L, $noinline$longTwoAddsUseShl(1L, 2L, 1L)); + + assertLongEquals(0L, $noinline$longTwoAddsMixedOrderUseShl(1L, 1L, 1L)); + assertLongEquals(1L, $noinline$longTwoAddsMixedOrderUseShl(1L, 0L, 1L)); + assertLongEquals(3L, $noinline$longTwoAddsMixedOrderUseShl(0L, 1L, 2L)); + + assertLongEquals(2L, $noinline$longOneAddSharesShlUse(1L, 1L, 1L)); + assertLongEquals(3L, $noinline$longOneAddSharesShlUse(1L, 0L, 1L)); + assertLongEquals(-1L, $noinline$longOneAddSharesShlUse(0L, 1L, 2L)); + assertLongEquals(5L, $noinline$longOneAddSharesShlUse(1L, 2L, 1L)); + + assertLongEquals(3L, $noinline$longOneAddSharesShlEnvironmentUse(1L, 1L)); + + assertLongEquals(4L, $noinline$longTwoTheSameShl(1L)); } } |