Opt compiler: Speedup div/rem by constants on arm32 and arm64.
This patch also includes:
1. Add java test for div/rem negative constants.
2. Fix a thumb2 encoding issue where the last operand is
"reg, shift #amount" in some instructions.
3. Support a simple filter in arm32 assembler test to filter out
unsupported cases, such as "smull r0, r0, r1, r2".
4. Add smull arm32 assembler test.
5. Add smull/umull thumb2 test.
6. Add test for the thumb2 encoding issue which is fixed in this
patch.
Change-Id: I1601bc9c38f70f11909f2816fe3ec105a158951e
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f56e446..e755dfe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,6 +17,7 @@
#include "code_generator_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
@@ -2185,11 +2186,134 @@
}
}
+void InstructionCodeGeneratorARM::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ LoadImmediate(out, 0);
+ } else {
+ if (imm == 1) {
+ __ Mov(out, dividend);
+ } else {
+ __ rsb(out, dividend, ShifterOperand(0));
+ }
+ }
+}
+
+void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ int32_t abs_imm = std::abs(imm);
+ DCHECK(IsPowerOfTwo(abs_imm));
+ int ctz_imm = CTZ(abs_imm);
+
+ if (ctz_imm == 1) {
+ __ Lsr(temp, dividend, 32 - ctz_imm);
+ } else {
+ __ Asr(temp, dividend, 31);
+ __ Lsr(temp, temp, 32 - ctz_imm);
+ }
+ __ add(out, temp, ShifterOperand(dividend));
+
+ if (instruction->IsDiv()) {
+ __ Asr(out, out, ctz_imm);
+ if (imm < 0) {
+ __ rsb(out, out, ShifterOperand(0));
+ }
+ } else {
+ __ ubfx(out, out, 0, ctz_imm);
+ __ sub(out, out, ShifterOperand(temp));
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ int64_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ __ LoadImmediate(temp1, magic);
+ __ smull(temp2, temp1, dividend, temp1);
+
+ if (imm > 0 && magic < 0) {
+ __ add(temp1, temp1, ShifterOperand(dividend));
+ } else if (imm < 0 && magic > 0) {
+ __ sub(temp1, temp1, ShifterOperand(dividend));
+ }
+
+ if (shift != 0) {
+ __ Asr(temp1, temp1, shift);
+ }
+
+ if (instruction->IsDiv()) {
+ __ sub(out, temp1, ShifterOperand(temp1, ASR, 31));
+ } else {
+ __ sub(temp1, temp1, ShifterOperand(temp1, ASR, 31));
+ // TODO: Strength reduction for mls.
+ __ LoadImmediate(temp2, imm);
+ __ mls(out, temp1, temp2, dividend);
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(std::abs(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+}
+
void LocationsBuilderARM::VisitDiv(HDiv* div) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
if (div->GetResultType() == Primitive::kPrimLong) {
// pLdiv runtime call.
call_kind = LocationSummary::kCall;
+ } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
+ // sdiv will be replaced by other instruction sequence.
} else if (div->GetResultType() == Primitive::kPrimInt &&
!codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
// pIdivmod runtime call.
@@ -2200,7 +2324,20 @@
switch (div->GetResultType()) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (div->InputAt(1)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
+ if (abs_imm <= 1) {
+ // No temp register required.
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ if (!IsPowerOfTwo(abs_imm)) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2244,7 +2381,9 @@
switch (div->GetResultType()) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (second.IsConstant()) {
+ GenerateDivRemConstantIntegral(div);
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
__ sdiv(out.AsRegister<Register>(),
first.AsRegister<Register>(),
second.AsRegister<Register>());
@@ -2296,8 +2435,11 @@
// Most remainders are implemented in the runtime.
LocationSummary::CallKind call_kind = LocationSummary::kCall;
- if (rem->GetResultType() == Primitive::kPrimInt &&
- codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
+ // sdiv will be replaced by other instruction sequence.
+ call_kind = LocationSummary::kNoCall;
+ } else if ((rem->GetResultType() == Primitive::kPrimInt)
+ && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
// Have hardware divide instruction for int, do it with three instructions.
call_kind = LocationSummary::kNoCall;
}
@@ -2306,7 +2448,20 @@
switch (type) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (rem->InputAt(1)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
+ if (abs_imm <= 1) {
+ // No temp register required.
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ if (!IsPowerOfTwo(abs_imm)) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2363,7 +2518,9 @@
Primitive::Type type = rem->GetResultType();
switch (type) {
case Primitive::kPrimInt: {
- if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ if (second.IsConstant()) {
+ GenerateDivRemConstantIntegral(rem);
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
Register reg1 = first.AsRegister<Register>();
Register reg2 = second.AsRegister<Register>();
Register temp = locations->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1a498e1..2edbcf8 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -189,6 +189,10 @@
Label* true_target,
Label* false_target,
Label* always_true_target);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+ void GenerateDivRemConstantIntegral(HBinaryOperation* instruction);
ArmAssembler* const assembler_;
CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b1cb880..00540e2 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -17,6 +17,7 @@
#include "code_generator_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "code_generator_utils.h"
#include "common_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -1603,6 +1604,152 @@
#undef DEFINE_CONDITION_VISITORS
#undef FOR_EACH_CONDITION_INSTRUCTION
+void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ Mov(out, 0);
+ } else {
+ if (imm == 1) {
+ __ Mov(out, dividend);
+ } else {
+ __ Neg(out, dividend);
+ }
+ }
+}
+
+void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ int64_t abs_imm = std::abs(imm);
+ DCHECK(IsPowerOfTwo(abs_imm));
+ int ctz_imm = CTZ(abs_imm);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+
+ if (instruction->IsDiv()) {
+ __ Add(temp, dividend, abs_imm - 1);
+ __ Cmp(dividend, 0);
+ __ Csel(out, temp, dividend, lt);
+ if (imm > 0) {
+ __ Asr(out, out, ctz_imm);
+ } else {
+ __ Neg(out, Operand(out, ASR, ctz_imm));
+ }
+ } else {
+ int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
+ __ Asr(temp, dividend, bits - 1);
+ __ Lsr(temp, temp, bits - ctz_imm);
+ __ Add(out, dividend, temp);
+ __ And(out, out, abs_imm - 1);
+ __ Sub(out, out, temp);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ Primitive::Type type = instruction->GetResultType();
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+
+ // temp = get_high(dividend * magic)
+ __ Mov(temp, magic);
+ if (type == Primitive::kPrimLong) {
+ __ Smulh(temp, dividend, temp);
+ } else {
+ __ Smull(temp.X(), dividend, temp);
+ __ Lsr(temp.X(), temp.X(), 32);
+ }
+
+ if (imm > 0 && magic < 0) {
+ __ Add(temp, temp, dividend);
+ } else if (imm < 0 && magic > 0) {
+ __ Sub(temp, temp, dividend);
+ }
+
+ if (shift != 0) {
+ __ Asr(temp, temp, shift);
+ }
+
+ if (instruction->IsDiv()) {
+ __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
+ } else {
+ __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
+ // TODO: Strength reduction for msub.
+ Register temp_imm = temps.AcquireSameSizeAs(out);
+ __ Mov(temp_imm, imm);
+ __ Msub(out, temp, temp_imm, dividend);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ Primitive::Type type = instruction->GetResultType();
+ DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Register out = OutputRegister(instruction);
+ Location second = locations->InAt(1);
+
+ if (second.IsConstant()) {
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(std::abs(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+ } else {
+ Register dividend = InputRegisterAt(instruction, 0);
+ Register divisor = InputRegisterAt(instruction, 1);
+ if (instruction->IsDiv()) {
+ __ Sdiv(out, dividend, divisor);
+ } else {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+ __ Sdiv(temp, dividend, divisor);
+ __ Msub(out, temp, divisor, dividend);
+ }
+ }
+}
+
void LocationsBuilderARM64::VisitDiv(HDiv* div) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1610,7 +1757,7 @@
case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -1631,7 +1778,7 @@
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
+ GenerateDivRemIntegral(div);
break;
case Primitive::kPrimFloat:
@@ -2454,7 +2601,7 @@
case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -2479,14 +2626,7 @@
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register dividend = InputRegisterAt(rem, 0);
- Register divisor = InputRegisterAt(rem, 1);
- Register output = OutputRegister(rem);
- Register temp = temps.AcquireSameSizeAs(output);
-
- __ Sdiv(temp, dividend, divisor);
- __ Msub(output, temp, divisor, dividend);
+ GenerateDivRemIntegral(rem);
break;
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8aeea54..0dc0918 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -163,6 +163,11 @@
vixl::Label* true_target,
vixl::Label* false_target,
vixl::Label* always_true_target);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+ void GenerateDivRemIntegral(HBinaryOperation* instruction);
+
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 313f365..dee8287 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -398,6 +398,8 @@
Condition cond = AL) = 0;
virtual void mls(Register rd, Register rn, Register rm, Register ra,
Condition cond = AL) = 0;
+ virtual void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+ Condition cond = AL) = 0;
virtual void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 9579691..6e165fc 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -200,6 +200,13 @@
}
+void Arm32Assembler::smull(Register rd_lo, Register rd_hi, Register rn,
+ Register rm, Condition cond) {
+ // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
+ EmitMulOp(cond, B23 | B22, rd_lo, rd_hi, rn, rm);
+}
+
+
void Arm32Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
Register rm, Condition cond) {
// Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index b922d66..55ec7b4 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -90,6 +90,8 @@
Condition cond = AL) OVERRIDE;
void mls(Register rd, Register rn, Register rm, Register ra,
Condition cond = AL) OVERRIDE;
+ void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+ Condition cond = AL) OVERRIDE;
void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index 4a0ae0b..efd517b 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -293,12 +293,29 @@
f();
}
+ // NOTE: Only support simple test like "aaa=bbb"
+ bool EvalFilterString(std::string filter) {
+ if (filter.compare("") == 0) {
+ return false;
+ }
+
+ size_t equal_sign_index = filter.find('=');
+ if (equal_sign_index == std::string::npos) {
+ EXPECT_TRUE(false) << "Unsupported filter string.";
+ }
+
+ std::string lhs = filter.substr(0, equal_sign_index);
+ std::string rhs = filter.substr(equal_sign_index + 1, std::string::npos);
+ return lhs.compare(rhs) == 0;
+ }
+
void TemplateHelper(std::function<void(arm::Register)> f, int depth ATTRIBUTE_UNUSED,
- bool without_pc,
- std::string fmt, std::ostringstream& oss) {
+ bool without_pc, std::string fmt, std::string filter,
+ std::ostringstream& oss) {
std::vector<arm::Register*> registers = without_pc ? GetRegistersWithoutPC() : GetRegisters();
for (auto reg : registers) {
std::string after_reg = fmt;
+ std::string after_reg_filter = filter;
std::string reg_string = GetRegName<RegisterView::kUsePrimaryName>(*reg);
size_t reg_index;
@@ -308,14 +325,23 @@
after_reg.replace(reg_index, strlen(reg_token), reg_string);
}
+ while ((reg_index = after_reg_filter.find(reg_token)) != std::string::npos) {
+ after_reg_filter.replace(reg_index, strlen(reg_token), reg_string);
+ }
+ if (EvalFilterString(after_reg_filter)) {
+ continue;
+ }
+
ExecuteAndPrint([&] () { f(*reg); }, after_reg, oss);
}
}
void TemplateHelper(std::function<void(const arm::ShifterOperand&)> f, int depth ATTRIBUTE_UNUSED,
- bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::ostringstream& oss) {
+ bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter,
+ std::ostringstream& oss) {
for (const arm::ShifterOperand& shift : GetShiftOperands()) {
std::string after_shift = fmt;
+ std::string after_shift_filter = filter;
std::string shift_string = GetShiftString(shift);
size_t shift_index;
@@ -323,30 +349,48 @@
after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
}
+ while ((shift_index = after_shift_filter.find(SHIFT_TOKEN)) != std::string::npos) {
+ after_shift_filter.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
+ }
+ if (EvalFilterString(after_shift_filter)) {
+ continue;
+ }
+
ExecuteAndPrint([&] () { f(shift); }, after_shift, oss);
}
}
void TemplateHelper(std::function<void(arm::Condition)> f, int depth ATTRIBUTE_UNUSED,
- bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::ostringstream& oss) {
+ bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter,
+ std::ostringstream& oss) {
for (arm::Condition c : GetConditions()) {
std::string after_cond = fmt;
+ std::string after_cond_filter = filter;
size_t cond_index = after_cond.find(COND_TOKEN);
if (cond_index != std::string::npos) {
after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
}
+ cond_index = after_cond_filter.find(COND_TOKEN);
+ if (cond_index != std::string::npos) {
+ after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+ }
+ if (EvalFilterString(after_cond_filter)) {
+ continue;
+ }
+
ExecuteAndPrint([&] () { f(c); }, after_cond, oss);
}
}
template <typename... Args>
void TemplateHelper(std::function<void(arm::Register, Args...)> f, int depth, bool without_pc,
- std::string fmt, std::ostringstream& oss) {
+ std::string fmt, std::string filter, std::ostringstream& oss) {
std::vector<arm::Register*> registers = without_pc ? GetRegistersWithoutPC() : GetRegisters();
for (auto reg : registers) {
std::string after_reg = fmt;
+ std::string after_reg_filter = filter;
std::string reg_string = GetRegName<RegisterView::kUsePrimaryName>(*reg);
size_t reg_index;
@@ -356,17 +400,26 @@
after_reg.replace(reg_index, strlen(reg_token), reg_string);
}
+ while ((reg_index = after_reg_filter.find(reg_token)) != std::string::npos) {
+ after_reg_filter.replace(reg_index, strlen(reg_token), reg_string);
+ }
+ if (EvalFilterString(after_reg_filter)) {
+ continue;
+ }
+
auto lambda = [&] (Args... args) { f(*reg, args...); }; // NOLINT [readability/braces] [4]
TemplateHelper(std::function<void(Args...)>(lambda), depth + 1, without_pc,
- after_reg, oss);
+ after_reg, after_reg_filter, oss);
}
}
template <typename... Args>
void TemplateHelper(std::function<void(const arm::ShifterOperand&, Args...)> f, int depth,
- bool without_pc, std::string fmt, std::ostringstream& oss) {
+ bool without_pc, std::string fmt, std::string filter,
+ std::ostringstream& oss) {
for (const arm::ShifterOperand& shift : GetShiftOperands()) {
std::string after_shift = fmt;
+ std::string after_shift_filter = filter;
std::string shift_string = GetShiftString(shift);
size_t shift_index;
@@ -374,26 +427,42 @@
after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
}
+ while ((shift_index = after_shift_filter.find(SHIFT_TOKEN)) != std::string::npos) {
+ after_shift_filter.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
+ }
+ if (EvalFilterString(after_shift_filter)) {
+ continue;
+ }
+
auto lambda = [&] (Args... args) { f(shift, args...); }; // NOLINT [readability/braces] [4]
TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc,
- after_shift, oss);
+ after_shift, after_shift_filter, oss);
}
}
template <typename... Args>
void TemplateHelper(std::function<void(arm::Condition, Args...)> f, int depth, bool without_pc,
- std::string fmt, std::ostringstream& oss) {
+ std::string fmt, std::string filter, std::ostringstream& oss) {
for (arm::Condition c : GetConditions()) {
std::string after_cond = fmt;
+ std::string after_cond_filter = filter;
size_t cond_index = after_cond.find(COND_TOKEN);
if (cond_index != std::string::npos) {
after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
}
+ cond_index = after_cond_filter.find(COND_TOKEN);
+ if (cond_index != std::string::npos) {
+ after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+ }
+ if (EvalFilterString(after_cond_filter)) {
+ continue;
+ }
+
auto lambda = [&] (Args... args) { f(c, args...); }; // NOLINT [readability/braces] [4]
TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc,
- after_cond, oss);
+ after_cond, after_cond_filter, oss);
}
}
@@ -421,13 +490,13 @@
template <typename... Args>
void GenericTemplateHelper(std::function<void(Args...)> f, bool without_pc,
- std::string fmt, std::string test_name) {
+ std::string fmt, std::string test_name, std::string filter) {
first_ = false;
WarnOnCombinations(CountHelper<Args...>(without_pc));
std::ostringstream oss;
- TemplateHelper(f, 0, without_pc, fmt, oss);
+ TemplateHelper(f, 0, without_pc, fmt, filter, oss);
oss << "\n"; // Trailing newline.
@@ -436,26 +505,26 @@
template <typename... Args>
void T2Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
- std::string test_name) {
- GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name);
+ std::string test_name, std::string filter = "") {
+ GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name, filter);
}
template <typename... Args>
void T3Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
- std::string test_name) {
- GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name);
+ std::string test_name, std::string filter = "") {
+ GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name, filter);
}
template <typename... Args>
void T4Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
- std::string test_name) {
- GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name);
+ std::string test_name, std::string filter = "") {
+ GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name, filter);
}
template <typename... Args>
void T5Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
- std::string test_name) {
- GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name);
+ std::string test_name, std::string filter = "") {
+ GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name, filter);
}
private:
@@ -565,15 +634,18 @@
}
TEST_F(AssemblerArm32Test, Mla) {
- T5Helper(&arm::Arm32Assembler::mla, true, "mla{cond} {reg1}, {reg2}, {reg3}, {reg4}", "mul");
+ T5Helper(&arm::Arm32Assembler::mla, true, "mla{cond} {reg1}, {reg2}, {reg3}, {reg4}", "mla");
}
-/* TODO: Needs support to filter out register combinations, as rdhi must not be equal to rdlo.
TEST_F(AssemblerArm32Test, Umull) {
T5Helper(&arm::Arm32Assembler::umull, true, "umull{cond} {reg1}, {reg2}, {reg3}, {reg4}",
- "umull");
+ "umull", "{reg1}={reg2}"); // Skip the cases where reg1 == reg2.
}
-*/
+
+TEST_F(AssemblerArm32Test, Smull) {
+ T5Helper(&arm::Arm32Assembler::smull, true, "smull{cond} {reg1}, {reg2}, {reg3}, {reg4}",
+ "smull", "{reg1}={reg2}"); // Skip the cases where reg1 == reg2.
+}
TEST_F(AssemblerArm32Test, Sdiv) {
T4Helper(&arm::Arm32Assembler::sdiv, true, "sdiv{cond} {reg1}, {reg2}, {reg3}", "sdiv");
@@ -655,9 +727,10 @@
T4Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond} {reg1}, {reg2}, {shift}", "rsc");
}
-/* TODO: Needs support to filter out register combinations, as reg1 must not be equal to reg3.
+/* TODO: Need better filter support.
TEST_F(AssemblerArm32Test, Strex) {
- RRRCWithoutPCHelper(&arm::Arm32Assembler::strex, "strex{cond} {reg1}, {reg2}, [{reg3}]", "strex");
+ T4Helper(&arm::Arm32Assembler::strex, "strex{cond} {reg1}, {reg2}, [{reg3}]", "strex",
+ "{reg1}={reg2}||{reg1}={reg3}"); // Skip the cases where reg1 == reg2 || reg1 == reg3.
}
*/
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 3b42f63..e7cf26e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -238,6 +238,24 @@
}
+void Thumb2Assembler::smull(Register rd_lo, Register rd_hi, Register rn,
+ Register rm, Condition cond) {
+ CheckCondition(cond);
+
+ uint32_t op1 = 0U /* 0b000; */;
+ uint32_t op2 = 0U /* 0b0000 */;
+ int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 |
+ op1 << 20 |
+ op2 << 4 |
+ static_cast<uint32_t>(rd_lo) << 12 |
+ static_cast<uint32_t>(rd_hi) << 8 |
+ static_cast<uint32_t>(rn) << 16 |
+ static_cast<uint32_t>(rm);
+
+ Emit32(encoding);
+}
+
+
void Thumb2Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
Register rm, Condition cond) {
CheckCondition(cond);
@@ -740,13 +758,6 @@
return true;
}
- // Check for MOV with an ROR.
- if (opcode == MOV && so.IsRegister() && so.IsShift() && so.GetShift() == ROR) {
- if (so.GetImmediate() != 0) {
- return true;
- }
- }
-
bool rn_is_valid = true;
// Check for single operand instructions and ADD/SUB.
@@ -792,6 +803,19 @@
}
}
+ // Check for register shift operand.
+ if (so.IsRegister() && so.IsShift()) {
+ if (opcode != MOV) {
+ return true;
+ }
+ // Check for MOV with an ROR.
+ if (so.GetShift() == ROR) {
+ if (so.GetImmediate() != 0) {
+ return true;
+ }
+ }
+ }
+
// The instruction can be encoded in 16 bits.
return false;
}
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index e33c240..17eae8b 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -112,6 +112,8 @@
Condition cond = AL) OVERRIDE;
void mls(Register rd, Register rn, Register rm, Register ra,
Condition cond = AL) OVERRIDE;
+ void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+ Condition cond = AL) OVERRIDE;
void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 5f5561a..733441b 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -89,23 +89,24 @@
EXPECT_TRUE(CheckTools());
}
+#define __ GetAssembler()->
TEST_F(AssemblerThumb2Test, Sbfx) {
- GetAssembler()->sbfx(arm::R0, arm::R1, 0, 1);
- GetAssembler()->sbfx(arm::R0, arm::R1, 0, 8);
- GetAssembler()->sbfx(arm::R0, arm::R1, 0, 16);
- GetAssembler()->sbfx(arm::R0, arm::R1, 0, 32);
+ __ sbfx(arm::R0, arm::R1, 0, 1);
+ __ sbfx(arm::R0, arm::R1, 0, 8);
+ __ sbfx(arm::R0, arm::R1, 0, 16);
+ __ sbfx(arm::R0, arm::R1, 0, 32);
- GetAssembler()->sbfx(arm::R0, arm::R1, 8, 1);
- GetAssembler()->sbfx(arm::R0, arm::R1, 8, 8);
- GetAssembler()->sbfx(arm::R0, arm::R1, 8, 16);
- GetAssembler()->sbfx(arm::R0, arm::R1, 8, 24);
+ __ sbfx(arm::R0, arm::R1, 8, 1);
+ __ sbfx(arm::R0, arm::R1, 8, 8);
+ __ sbfx(arm::R0, arm::R1, 8, 16);
+ __ sbfx(arm::R0, arm::R1, 8, 24);
- GetAssembler()->sbfx(arm::R0, arm::R1, 16, 1);
- GetAssembler()->sbfx(arm::R0, arm::R1, 16, 8);
- GetAssembler()->sbfx(arm::R0, arm::R1, 16, 16);
+ __ sbfx(arm::R0, arm::R1, 16, 1);
+ __ sbfx(arm::R0, arm::R1, 16, 8);
+ __ sbfx(arm::R0, arm::R1, 16, 16);
- GetAssembler()->sbfx(arm::R0, arm::R1, 31, 1);
+ __ sbfx(arm::R0, arm::R1, 31, 1);
const char* expected =
"sbfx r0, r1, #0, #1\n"
@@ -127,21 +128,21 @@
}
TEST_F(AssemblerThumb2Test, Ubfx) {
- GetAssembler()->ubfx(arm::R0, arm::R1, 0, 1);
- GetAssembler()->ubfx(arm::R0, arm::R1, 0, 8);
- GetAssembler()->ubfx(arm::R0, arm::R1, 0, 16);
- GetAssembler()->ubfx(arm::R0, arm::R1, 0, 32);
+ __ ubfx(arm::R0, arm::R1, 0, 1);
+ __ ubfx(arm::R0, arm::R1, 0, 8);
+ __ ubfx(arm::R0, arm::R1, 0, 16);
+ __ ubfx(arm::R0, arm::R1, 0, 32);
- GetAssembler()->ubfx(arm::R0, arm::R1, 8, 1);
- GetAssembler()->ubfx(arm::R0, arm::R1, 8, 8);
- GetAssembler()->ubfx(arm::R0, arm::R1, 8, 16);
- GetAssembler()->ubfx(arm::R0, arm::R1, 8, 24);
+ __ ubfx(arm::R0, arm::R1, 8, 1);
+ __ ubfx(arm::R0, arm::R1, 8, 8);
+ __ ubfx(arm::R0, arm::R1, 8, 16);
+ __ ubfx(arm::R0, arm::R1, 8, 24);
- GetAssembler()->ubfx(arm::R0, arm::R1, 16, 1);
- GetAssembler()->ubfx(arm::R0, arm::R1, 16, 8);
- GetAssembler()->ubfx(arm::R0, arm::R1, 16, 16);
+ __ ubfx(arm::R0, arm::R1, 16, 1);
+ __ ubfx(arm::R0, arm::R1, 16, 8);
+ __ ubfx(arm::R0, arm::R1, 16, 16);
- GetAssembler()->ubfx(arm::R0, arm::R1, 31, 1);
+ __ ubfx(arm::R0, arm::R1, 31, 1);
const char* expected =
"ubfx r0, r1, #0, #1\n"
@@ -163,7 +164,7 @@
}
TEST_F(AssemblerThumb2Test, Vmstat) {
- GetAssembler()->vmstat();
+ __ vmstat();
const char* expected = "vmrs APSR_nzcv, FPSCR\n";
@@ -171,10 +172,10 @@
}
TEST_F(AssemblerThumb2Test, ldrexd) {
- GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0);
- GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1);
- GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2);
- GetAssembler()->ldrexd(arm::R5, arm::R3, arm::R7);
+ __ ldrexd(arm::R0, arm::R1, arm::R0);
+ __ ldrexd(arm::R0, arm::R1, arm::R1);
+ __ ldrexd(arm::R0, arm::R1, arm::R2);
+ __ ldrexd(arm::R5, arm::R3, arm::R7);
const char* expected =
"ldrexd r0, r1, [r0]\n"
@@ -185,10 +186,10 @@
}
TEST_F(AssemblerThumb2Test, strexd) {
- GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0);
- GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1);
- GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2);
- GetAssembler()->strexd(arm::R9, arm::R5, arm::R3, arm::R7);
+ __ strexd(arm::R9, arm::R0, arm::R1, arm::R0);
+ __ strexd(arm::R9, arm::R0, arm::R1, arm::R1);
+ __ strexd(arm::R9, arm::R0, arm::R1, arm::R2);
+ __ strexd(arm::R9, arm::R5, arm::R3, arm::R7);
const char* expected =
"strexd r9, r0, r1, [r0]\n"
@@ -199,9 +200,9 @@
}
TEST_F(AssemblerThumb2Test, LdrdStrd) {
- GetAssembler()->ldrd(arm::R0, arm::Address(arm::R2, 8));
- GetAssembler()->ldrd(arm::R0, arm::Address(arm::R12));
- GetAssembler()->strd(arm::R0, arm::Address(arm::R2, 8));
+ __ ldrd(arm::R0, arm::Address(arm::R2, 8));
+ __ ldrd(arm::R0, arm::Address(arm::R12));
+ __ strd(arm::R0, arm::Address(arm::R2, 8));
const char* expected =
"ldrd r0, r1, [r2, #8]\n"
@@ -211,7 +212,6 @@
}
TEST_F(AssemblerThumb2Test, eor) {
-#define __ GetAssembler()->
__ eor(arm::R1, arm::R1, arm::ShifterOperand(arm::R0));
__ eor(arm::R1, arm::R0, arm::ShifterOperand(arm::R1));
__ eor(arm::R1, arm::R8, arm::ShifterOperand(arm::R0));
@@ -230,23 +230,47 @@
TEST_F(AssemblerThumb2Test, sub) {
__ subs(arm::R1, arm::R0, arm::ShifterOperand(42));
__ sub(arm::R1, arm::R0, arm::ShifterOperand(42));
+ __ subs(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
+ __ sub(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
const char* expected =
"subs r1, r0, #42\n"
- "subw r1, r0, #42\n";
+ "subw r1, r0, #42\n"
+ "subs r1, r0, r2, asr #31\n"
+ "sub r1, r0, r2, asr #31\n";
DriverStr(expected, "sub");
}
TEST_F(AssemblerThumb2Test, add) {
__ adds(arm::R1, arm::R0, arm::ShifterOperand(42));
__ add(arm::R1, arm::R0, arm::ShifterOperand(42));
+ __ adds(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
+ __ add(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
const char* expected =
"adds r1, r0, #42\n"
- "addw r1, r0, #42\n";
+ "addw r1, r0, #42\n"
+ "adds r1, r0, r2, asr #31\n"
+ "add r1, r0, r2, asr #31\n";
DriverStr(expected, "add");
}
+TEST_F(AssemblerThumb2Test, umull) {
+ __ umull(arm::R0, arm::R1, arm::R2, arm::R3);
+
+ const char* expected =
+ "umull r0, r1, r2, r3\n";
+ DriverStr(expected, "umull");
+}
+
+TEST_F(AssemblerThumb2Test, smull) {
+ __ smull(arm::R0, arm::R1, arm::R2, arm::R3);
+
+ const char* expected =
+ "smull r0, r1, r2, r3\n";
+ DriverStr(expected, "smull");
+}
+
TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
arm::StoreOperandType type = arm::kStoreWord;
int32_t offset = 4092;