Revert "Revert "ARM/ARM64: Extend support of instruction combining.""
This reverts commit 6b5afdd144d2bb3bf994240797834b5666b2cf98.
Change-Id: Ic27a10f02e21109503edd64e6d73d1bb0c6a8ac6
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 2cbafea..7a257b6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -142,7 +142,9 @@
jni/quick/arm64/calling_convention_arm64.cc \
linker/arm64/relative_patcher_arm64.cc \
optimizing/code_generator_arm64.cc \
+ optimizing/instruction_simplifier_arm.cc \
optimizing/instruction_simplifier_arm64.cc \
+ optimizing/instruction_simplifier_shared.cc \
optimizing/intrinsics_arm64.cc \
utils/arm64/assembler_arm64.cc \
utils/arm64/managed_register_arm64.cc \
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index cdbb9c3..10d3426 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -6413,6 +6413,33 @@
return DeduplicateMethodLiteral(target_method, &call_patches_);
}
+void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+ locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+ Location::RequiresRegister());
+ locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+ locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+ LocationSummary* locations = instr->GetLocations();
+ Register res = locations->Out().AsRegister<Register>();
+ Register accumulator =
+ locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex).AsRegister<Register>();
+ Register mul_left =
+ locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex).AsRegister<Register>();
+ Register mul_right =
+ locations->InAt(HMultiplyAccumulate::kInputMulRightIndex).AsRegister<Register>();
+
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ mla(res, mul_left, mul_right, accumulator);
+ } else {
+ __ mls(res, mul_left, mul_right, accumulator);
+ }
+}
+
void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 2e4dc1e..06e7c00 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,6 +159,7 @@
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
@@ -197,6 +198,7 @@
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 814f8b4..beb75f0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1959,21 +1959,27 @@
Operand(InputOperandAt(instruction, 1)));
}
-void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
- locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
- Location::RequiresRegister());
- locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
- locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+ HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+ if (instr->GetOpKind() == HInstruction::kSub &&
+ accumulator->IsConstant() &&
+ accumulator->AsConstant()->IsZero()) {
+ // Don't allocate register for Mneg instruction.
+ } else {
+ locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+ Location::RequiresRegister());
+ }
+ locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+ locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
-void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
Register res = OutputRegister(instr);
- Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
- Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
- Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+ Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+ Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
// Avoid emitting code that could trigger Cortex A53's erratum 835769.
// This fixup should be carried out for all multiply-accumulate instructions:
@@ -1993,10 +1999,17 @@
}
if (instr->GetOpKind() == HInstruction::kAdd) {
+ Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
__ Madd(res, mul_left, mul_right, accumulator);
} else {
DCHECK(instr->GetOpKind() == HInstruction::kSub);
- __ Msub(res, mul_left, mul_right, accumulator);
+ HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+ if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) {
+ __ Mneg(res, mul_left, mul_right);
+ } else {
+ Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
+ __ Msub(res, mul_left, mul_right, accumulator);
+ }
}
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 3527261..10f1e7f 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -196,6 +196,7 @@
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
@@ -285,6 +286,7 @@
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index c0263e4..a3d6bcf 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -436,6 +436,12 @@
StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
}
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+ void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
+ StartAttributeStream("kind") << instruction->GetOpKind();
+ }
+#endif
+
#ifdef ART_ENABLE_CODEGEN_arm64
void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
@@ -443,10 +449,6 @@
StartAttributeStream("shift") << instruction->GetShiftAmount();
}
}
-
- void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
- StartAttributeStream("kind") << instruction->GetOpKind();
- }
#endif
bool IsPass(const char* name) {
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
new file mode 100644
index 0000000..db1f9a7
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_arm.h"
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+namespace arm {
+
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+ if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+ RecordSimplification();
+ }
+}
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
new file mode 100644
index 0000000..379b95d
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class InstructionSimplifierArmVisitor : public HGraphVisitor {
+ public:
+ InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+ void RecordSimplification() {
+ if (stats_ != nullptr) {
+ stats_->RecordStat(kInstructionSimplificationsArch);
+ }
+ }
+
+ void VisitMul(HMul* instruction) OVERRIDE;
+
+ OptimizingCompilerStats* stats_;
+};
+
+
+class InstructionSimplifierArm : public HOptimization {
+ public:
+ InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+
+ void Run() OVERRIDE {
+ InstructionSimplifierArmVisitor visitor(graph_, stats_);
+ visitor.VisitReversePostOrder();
+ }
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 4bcfc54..83126a5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -17,6 +17,7 @@
#include "instruction_simplifier_arm64.h"
#include "common_arm64.h"
+#include "instruction_simplifier_shared.h"
#include "mirror/array-inl.h"
namespace art {
@@ -179,67 +180,6 @@
return true;
}
-bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
- HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
- DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
- DCHECK(input_binop->IsAdd() || input_binop->IsSub());
- DCHECK_NE(input_binop, input_other);
- if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
- return false;
- }
-
- // Try to interpret patterns like
- // a * (b <+/-> 1)
- // as
- // (a * b) <+/-> a
- HInstruction* input_a = input_other;
- HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize.
- HInstruction::InstructionKind op_kind;
-
- if (input_binop->IsAdd()) {
- if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
- // Interpret
- // a * (b + 1)
- // as
- // (a * b) + a
- input_b = input_binop->GetLeastConstantLeft();
- op_kind = HInstruction::kAdd;
- }
- } else {
- DCHECK(input_binop->IsSub());
- if (input_binop->GetRight()->IsConstant() &&
- input_binop->GetRight()->AsConstant()->IsMinusOne()) {
- // Interpret
- // a * (b - (-1))
- // as
- // a + (a * b)
- input_b = input_binop->GetLeft();
- op_kind = HInstruction::kAdd;
- } else if (input_binop->GetLeft()->IsConstant() &&
- input_binop->GetLeft()->AsConstant()->IsOne()) {
- // Interpret
- // a * (1 - b)
- // as
- // a - (a * b)
- input_b = input_binop->GetRight();
- op_kind = HInstruction::kSub;
- }
- }
-
- if (input_b == nullptr) {
- // We did not find a pattern we can optimize.
- return false;
- }
-
- HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
- mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
-
- mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
- input_binop->GetBlock()->RemoveInstruction(input_binop);
-
- return false;
-}
-
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
TryExtractArrayAccessAddress(instruction,
instruction->GetArray(),
@@ -255,75 +195,8 @@
}
void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
- Primitive::Type type = instruction->GetType();
- if (!Primitive::IsIntOrLongType(type)) {
- return;
- }
-
- HInstruction* use = instruction->HasNonEnvironmentUses()
- ? instruction->GetUses().GetFirst()->GetUser()
- : nullptr;
-
- if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
- // Replace code looking like
- // MUL tmp, x, y
- // SUB dst, acc, tmp
- // with
- // MULSUB dst, acc, x, y
- // Note that we do not want to (unconditionally) perform the merge when the
- // multiplication has multiple uses and it can be merged in all of them.
- // Multiple uses could happen on the same control-flow path, and we would
- // then increase the amount of work. In the future we could try to evaluate
- // whether all uses are on different control-flow paths (using dominance and
- // reverse-dominance information) and only perform the merge when they are.
- HInstruction* accumulator = nullptr;
- HBinaryOperation* binop = use->AsBinaryOperation();
- HInstruction* binop_left = binop->GetLeft();
- HInstruction* binop_right = binop->GetRight();
- // Be careful after GVN. This should not happen since the `HMul` has only
- // one use.
- DCHECK_NE(binop_left, binop_right);
- if (binop_right == instruction) {
- accumulator = binop_left;
- } else if (use->IsAdd()) {
- DCHECK_EQ(binop_left, instruction);
- accumulator = binop_right;
- }
-
- if (accumulator != nullptr) {
- HArm64MultiplyAccumulate* mulacc =
- new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
- binop->GetKind(),
- accumulator,
- instruction->GetLeft(),
- instruction->GetRight());
-
- binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
- DCHECK(!instruction->HasUses());
- instruction->GetBlock()->RemoveInstruction(instruction);
- RecordSimplification();
- return;
- }
- }
-
- // Use multiply accumulate instruction for a few simple patterns.
- // We prefer not applying the following transformations if the left and
- // right inputs perform the same operation.
- // We rely on GVN having squashed the inputs if appropriate. However the
- // results are still correct even if that did not happen.
- if (instruction->GetLeft() == instruction->GetRight()) {
- return;
- }
-
- HInstruction* left = instruction->GetLeft();
- HInstruction* right = instruction->GetRight();
- if ((right->IsAdd() || right->IsSub()) &&
- TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
- return;
- }
- if ((left->IsAdd() || left->IsSub()) &&
- TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
- return;
+ if (TryCombineMultiplyAccumulate(instruction, kArm64)) {
+ RecordSimplification();
}
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index b7f490b..37a34c0 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,10 +51,6 @@
return TryMergeIntoShifterOperand(use, bitfield_op, true);
}
- bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
- HBinaryOperation* input_binop,
- HInstruction* input_other);
-
// HInstruction visitors, sorted alphabetically.
void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
void VisitArraySet(HArraySet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
new file mode 100644
index 0000000..45d196f
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+
+namespace {
+
+bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+ HBinaryOperation* input_binop,
+ HInstruction* input_other) {
+ DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+ DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+ DCHECK_NE(input_binop, input_other);
+ if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+ return false;
+ }
+
+ // Try to interpret patterns like
+ // a * (b <+/-> 1)
+ // as
+ // (a * b) <+/-> a
+ HInstruction* input_a = input_other;
+ HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize.
+ HInstruction::InstructionKind op_kind;
+
+ if (input_binop->IsAdd()) {
+ if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+ // Interpret
+ // a * (b + 1)
+ // as
+ // (a * b) + a
+ input_b = input_binop->GetLeastConstantLeft();
+ op_kind = HInstruction::kAdd;
+ }
+ } else {
+ DCHECK(input_binop->IsSub());
+ if (input_binop->GetRight()->IsConstant() &&
+ input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+ // Interpret
+ // a * (b - (-1))
+ // as
+ // a + (a * b)
+ input_b = input_binop->GetLeft();
+ op_kind = HInstruction::kAdd;
+ } else if (input_binop->GetLeft()->IsConstant() &&
+ input_binop->GetLeft()->AsConstant()->IsOne()) {
+ // Interpret
+ // a * (1 - b)
+ // as
+ // a - (a * b)
+ input_b = input_binop->GetRight();
+ op_kind = HInstruction::kSub;
+ }
+ }
+
+ if (input_b == nullptr) {
+ // We did not find a pattern we can optimize.
+ return false;
+ }
+
+ ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+ HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate(
+ mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+ mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+ input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+ return true;
+}
+
+} // namespace
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
+ Primitive::Type type = mul->GetType();
+ switch (isa) {
+ case kArm:
+ case kThumb2:
+ if (type != Primitive::kPrimInt) {
+ return false;
+ }
+ break;
+ case kArm64:
+ if (!Primitive::IsIntOrLongType(type)) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ HInstruction* use = mul->HasNonEnvironmentUses()
+ ? mul->GetUses().GetFirst()->GetUser()
+ : nullptr;
+
+ ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+ if (mul->HasOnlyOneNonEnvironmentUse()) {
+ if (use->IsAdd() || use->IsSub()) {
+ // Replace code looking like
+ // MUL tmp, x, y
+ // SUB dst, acc, tmp
+ // with
+ // MULSUB dst, acc, x, y
+ // Note that we do not want to (unconditionally) perform the merge when the
+ // multiplication has multiple uses and it can be merged in all of them.
+ // Multiple uses could happen on the same control-flow path, and we would
+ // then increase the amount of work. In the future we could try to evaluate
+ // whether all uses are on different control-flow paths (using dominance and
+ // reverse-dominance information) and only perform the merge when they are.
+ HInstruction* accumulator = nullptr;
+ HBinaryOperation* binop = use->AsBinaryOperation();
+ HInstruction* binop_left = binop->GetLeft();
+ HInstruction* binop_right = binop->GetRight();
+ // Be careful after GVN. This should not happen since the `HMul` has only
+ // one use.
+ DCHECK_NE(binop_left, binop_right);
+ if (binop_right == mul) {
+ accumulator = binop_left;
+ } else if (use->IsAdd()) {
+ DCHECK_EQ(binop_left, mul);
+ accumulator = binop_right;
+ }
+
+ if (accumulator != nullptr) {
+ HMultiplyAccumulate* mulacc =
+ new (arena) HMultiplyAccumulate(type,
+ binop->GetKind(),
+ accumulator,
+ mul->GetLeft(),
+ mul->GetRight());
+
+ binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+ DCHECK(!mul->HasUses());
+ mul->GetBlock()->RemoveInstruction(mul);
+ return true;
+ }
+ } else if (use->IsNeg() && isa != kArm) {
+ HMultiplyAccumulate* mulacc =
+ new (arena) HMultiplyAccumulate(type,
+ HInstruction::kSub,
+ mul->GetBlock()->GetGraph()->GetConstant(type, 0),
+ mul->GetLeft(),
+ mul->GetRight());
+
+ use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
+ DCHECK(!mul->HasUses());
+ mul->GetBlock()->RemoveInstruction(mul);
+ return true;
+ }
+ }
+
+ // Use multiply accumulate instruction for a few simple patterns.
+ // We prefer not applying the following transformations if the left and
+ // right inputs perform the same operation.
+ // We rely on GVN having squashed the inputs if appropriate. However the
+ // results are still correct even if that did not happen.
+ if (mul->GetLeft() == mul->GetRight()) {
+ return false;
+ }
+
+ HInstruction* left = mul->GetLeft();
+ HInstruction* right = mul->GetRight();
+ if ((right->IsAdd() || right->IsSub()) &&
+ TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
+ return true;
+ }
+ if ((left->IsAdd() || left->IsSub()) &&
+ TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
+ return true;
+ }
+ return false;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
new file mode 100644
index 0000000..9832ecc
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 399afab..2b7d2de 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1247,6 +1247,16 @@
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
+/*
+ * Instructions, shared across several (not all) architectures.
+ */
+#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \
+ M(MultiplyAccumulate, Instruction)
+#endif
+
#ifndef ART_ENABLE_CODEGEN_arm
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
#else
@@ -1259,8 +1269,7 @@
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
M(Arm64DataProcWithShifterOp, Instruction) \
- M(Arm64IntermediateAddress, Instruction) \
- M(Arm64MultiplyAccumulate, Instruction)
+ M(Arm64IntermediateAddress, Instruction)
#endif
#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1281,6 +1290,7 @@
#define FOR_EACH_CONCRETE_INSTRUCTION(M) \
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
+ FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \
FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \
@@ -6060,6 +6070,9 @@
} // namespace art
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+#include "nodes_shared.h"
+#endif
#ifdef ART_ENABLE_CODEGEN_arm
#include "nodes_arm.h"
#endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 445cdab..173852a 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,40 +118,6 @@
DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
};
-class HArm64MultiplyAccumulate : public HExpression<3> {
- public:
- HArm64MultiplyAccumulate(Primitive::Type type,
- InstructionKind op,
- HInstruction* accumulator,
- HInstruction* mul_left,
- HInstruction* mul_right,
- uint32_t dex_pc = kNoDexPc)
- : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
- SetRawInputAt(kInputAccumulatorIndex, accumulator);
- SetRawInputAt(kInputMulLeftIndex, mul_left);
- SetRawInputAt(kInputMulRightIndex, mul_right);
- }
-
- static constexpr int kInputAccumulatorIndex = 0;
- static constexpr int kInputMulLeftIndex = 1;
- static constexpr int kInputMulRightIndex = 2;
-
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
- return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
- }
-
- InstructionKind GetOpKind() const { return op_kind_; }
-
- DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
-
- private:
- // Indicates if this is a MADD or MSUB.
- InstructionKind op_kind_;
-
- DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
-};
-
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
new file mode 100644
index 0000000..b04b622
--- /dev/null
+++ b/compiler/optimizing/nodes_shared.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+
+namespace art {
+
+class HMultiplyAccumulate : public HExpression<3> {
+ public:
+ HMultiplyAccumulate(Primitive::Type type,
+ InstructionKind op,
+ HInstruction* accumulator,
+ HInstruction* mul_left,
+ HInstruction* mul_right,
+ uint32_t dex_pc = kNoDexPc)
+ : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+ SetRawInputAt(kInputAccumulatorIndex, accumulator);
+ SetRawInputAt(kInputMulLeftIndex, mul_left);
+ SetRawInputAt(kInputMulRightIndex, mul_right);
+ }
+
+ static constexpr int kInputAccumulatorIndex = 0;
+ static constexpr int kInputMulLeftIndex = 1;
+ static constexpr int kInputMulRightIndex = 2;
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+ return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
+ }
+
+ InstructionKind GetOpKind() const { return op_kind_; }
+
+ DECLARE_INSTRUCTION(MultiplyAccumulate);
+
+ private:
+ // Indicates if this is a MADD or MSUB.
+ const InstructionKind op_kind_;
+
+ DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b1891c9..5a9f258 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -60,6 +60,7 @@
#include "induction_var_analysis.h"
#include "inliner.h"
#include "instruction_simplifier.h"
+#include "instruction_simplifier_arm.h"
#include "intrinsics.h"
#include "jit/debugger_interface.h"
#include "jit/jit_code_cache.h"
@@ -438,7 +439,10 @@
case kThumb2:
case kArm: {
arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+ arm::InstructionSimplifierArm* simplifier =
+ new (arena) arm::InstructionSimplifierArm(graph, stats);
HOptimization* arm_optimizations[] = {
+ simplifier,
fixups
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);