Opt compiler: Speedup div/rem by constants on arm32 and arm64.
This patch also includes:
1. Add java test for div/rem negative constants.
2. Fix a thumb2 encoding issue where the last operand is
"reg, shift #amount" in some instructions.
3. Support a simple filter in arm32 assembler test to filter out
unsupported cases, such as "smull r0, r0, r1, r2".
4. Add smull arm32 assembler test.
5. Add smull/umull thumb2 test.
6. Add test for the thumb2 encoding issue which is fixed in this
patch.
Change-Id: I1601bc9c38f70f11909f2816fe3ec105a158951e
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f56e446..e755dfe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,6 +17,7 @@
#include "code_generator_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
@@ -2185,11 +2186,134 @@
}
}
+void InstructionCodeGeneratorARM::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ LoadImmediate(out, 0);
+ } else {
+ if (imm == 1) {
+ __ Mov(out, dividend);
+ } else {
+ __ rsb(out, dividend, ShifterOperand(0));
+ }
+ }
+}
+
+void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ int32_t abs_imm = std::abs(imm);
+ DCHECK(IsPowerOfTwo(abs_imm));
+ int ctz_imm = CTZ(abs_imm);
+
+ if (ctz_imm == 1) {
+ __ Lsr(temp, dividend, 32 - ctz_imm);
+ } else {
+ __ Asr(temp, dividend, 31);
+ __ Lsr(temp, temp, 32 - ctz_imm);
+ }
+ __ add(out, temp, ShifterOperand(dividend));
+
+ if (instruction->IsDiv()) {
+ __ Asr(out, out, ctz_imm);
+ if (imm < 0) {
+ __ rsb(out, out, ShifterOperand(0));
+ }
+ } else {
+ __ ubfx(out, out, 0, ctz_imm);
+ __ sub(out, out, ShifterOperand(temp));
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ int64_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ __ LoadImmediate(temp1, magic);
+ __ smull(temp2, temp1, dividend, temp1);
+
+ if (imm > 0 && magic < 0) {
+ __ add(temp1, temp1, ShifterOperand(dividend));
+ } else if (imm < 0 && magic > 0) {
+ __ sub(temp1, temp1, ShifterOperand(dividend));
+ }
+
+ if (shift != 0) {
+ __ Asr(temp1, temp1, shift);
+ }
+
+ if (instruction->IsDiv()) {
+ __ sub(out, temp1, ShifterOperand(temp1, ASR, 31));
+ } else {
+ __ sub(temp1, temp1, ShifterOperand(temp1, ASR, 31));
+ // TODO: Strength reduction for mls.
+ __ LoadImmediate(temp2, imm);
+ __ mls(out, temp1, temp2, dividend);
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(std::abs(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+}
+
void LocationsBuilderARM::VisitDiv(HDiv* div) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
if (div->GetResultType() == Primitive::kPrimLong) {
// pLdiv runtime call.
call_kind = LocationSummary::kCall;
+ } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
+ // sdiv will be replaced by other instruction sequence.
} else if (div->GetResultType() == Primitive::kPrimInt &&
!codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
// pIdivmod runtime call.
@@ -2200,7 +2324,20 @@
switch (div->GetResultType()) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (div->InputAt(1)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
+ if (abs_imm <= 1) {
+ // No temp register required.
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ if (!IsPowerOfTwo(abs_imm)) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2244,7 +2381,9 @@
switch (div->GetResultType()) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (second.IsConstant()) {
+ GenerateDivRemConstantIntegral(div);
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
__ sdiv(out.AsRegister<Register>(),
first.AsRegister<Register>(),
second.AsRegister<Register>());
@@ -2296,8 +2435,11 @@
// Most remainders are implemented in the runtime.
LocationSummary::CallKind call_kind = LocationSummary::kCall;
- if (rem->GetResultType() == Primitive::kPrimInt &&
- codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
+ // sdiv will be replaced by other instruction sequence.
+ call_kind = LocationSummary::kNoCall;
+ } else if ((rem->GetResultType() == Primitive::kPrimInt)
+ && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
// Have hardware divide instruction for int, do it with three instructions.
call_kind = LocationSummary::kNoCall;
}
@@ -2306,7 +2448,20 @@
switch (type) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (rem->InputAt(1)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
+ if (abs_imm <= 1) {
+ // No temp register required.
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ if (!IsPowerOfTwo(abs_imm)) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2363,7 +2518,9 @@
Primitive::Type type = rem->GetResultType();
switch (type) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (second.IsConstant()) {
+ GenerateDivRemConstantIntegral(rem);
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
Register reg1 = first.AsRegister<Register>();
Register reg2 = second.AsRegister<Register>();
Register temp = locations->GetTemp(0).AsRegister<Register>();