MIPS32 Implement signed 64-bit division by powers of 2
Test: ./testrunner.py --target --optimizing in QEMU
Change-Id: I662770eb31d557392588b637f6b03dd5aee96a83
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 2ed0ab7..815e32c 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3767,77 +3767,251 @@
void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
- DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32);
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
- Register out = locations->Out().AsRegister<Register>();
- Register dividend = locations->InAt(0).AsRegister<Register>();
- int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ int64_t imm = Int64FromConstant(second.GetConstant());
DCHECK(imm == 1 || imm == -1);
- if (instruction->IsRem()) {
- __ Move(out, ZERO);
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ if (instruction->IsRem()) {
+ __ Move(out, ZERO);
+ } else {
+ if (imm == -1) {
+ __ Subu(out, ZERO, dividend);
+ } else if (out != dividend) {
+ __ Move(out, dividend);
+ }
+ }
} else {
- if (imm == -1) {
- __ Subu(out, ZERO, dividend);
- } else if (out != dividend) {
- __ Move(out, dividend);
+ DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+ Register out_high = locations->Out().AsRegisterPairHigh<Register>();
+ Register out_low = locations->Out().AsRegisterPairLow<Register>();
+ Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register in_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ if (instruction->IsRem()) {
+ __ Move(out_high, ZERO);
+ __ Move(out_low, ZERO);
+ } else {
+ if (imm == -1) {
+ __ Subu(out_low, ZERO, in_low);
+ __ Sltu(AT, ZERO, out_low);
+ __ Subu(out_high, ZERO, in_high);
+ __ Subu(out_high, out_high, AT);
+ } else {
+ __ Move(out_low, in_low);
+ __ Move(out_high, in_high);
+ }
void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
- DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32);
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
+ const bool is_r2_or_newer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ const bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
- Register out = locations->Out().AsRegister<Register>();
- Register dividend = locations->InAt(0).AsRegister<Register>();
- int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
- uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
- int ctz_imm = CTZ(abs_imm);
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ Register out = locations->Out().AsRegister<Register>();
+ Register dividend = locations->InAt(0).AsRegister<Register>();
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+ int ctz_imm = CTZ(abs_imm);
- if (instruction->IsDiv()) {
- if (ctz_imm == 1) {
- // Fast path for division by +/-2, which is very common.
- __ Srl(TMP, dividend, 31);
+ if (instruction->IsDiv()) {
+ if (ctz_imm == 1) {
+ // Fast path for division by +/-2, which is very common.
+ __ Srl(TMP, dividend, 31);
+ } else {
+ __ Sra(TMP, dividend, 31);
+ __ Srl(TMP, TMP, 32 - ctz_imm);
+ }
+ __ Addu(out, dividend, TMP);
+ __ Sra(out, out, ctz_imm);
+ if (imm < 0) {
+ __ Subu(out, ZERO, out);
+ }
} else {
- __ Sra(TMP, dividend, 31);
- __ Srl(TMP, TMP, 32 - ctz_imm);
- }
- __ Addu(out, dividend, TMP);
- __ Sra(out, out, ctz_imm);
- if (imm < 0) {
- __ Subu(out, ZERO, out);
+ if (ctz_imm == 1) {
+ // Fast path for modulo +/-2, which is very common.
+ __ Sra(TMP, dividend, 31);
+ __ Subu(out, dividend, TMP);
+ __ Andi(out, out, 1);
+ __ Addu(out, out, TMP);
+ } else {
+ __ Sra(TMP, dividend, 31);
+ __ Srl(TMP, TMP, 32 - ctz_imm);
+ __ Addu(out, dividend, TMP);
+ if (IsUint<16>(abs_imm - 1)) {
+ __ Andi(out, out, abs_imm - 1);
+ } else {
+ if (is_r2_or_newer) {
+ __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm);
+ } else {
+ __ Sll(out, out, 32 - ctz_imm);
+ __ Srl(out, out, 32 - ctz_imm);
+ }
+ }
+ __ Subu(out, out, TMP);
+ }
} else {
- if (ctz_imm == 1) {
- // Fast path for modulo +/-2, which is very common.
- __ Sra(TMP, dividend, 31);
- __ Subu(out, dividend, TMP);
- __ Andi(out, out, 1);
- __ Addu(out, out, TMP);
- } else {
- __ Sra(TMP, dividend, 31);
- __ Srl(TMP, TMP, 32 - ctz_imm);
- __ Addu(out, dividend, TMP);
- if (IsUint<16>(abs_imm - 1)) {
- __ Andi(out, out, abs_imm - 1);
- } else {
- if (codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()) {
- __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm);
+ DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+ Register out_high = locations->Out().AsRegisterPairHigh<Register>();
+ Register out_low = locations->Out().AsRegisterPairLow<Register>();
+ Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register in_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+ int ctz_imm = CTZ(abs_imm);
+ if (instruction->IsDiv()) {
+ if (ctz_imm < 32) {
+ if (ctz_imm == 1) {
+ __ Srl(AT, in_high, 31);
} else {
- __ Sll(out, out, 32 - ctz_imm);
- __ Srl(out, out, 32 - ctz_imm);
+ __ Sra(AT, in_high, 31);
+ __ Srl(AT, AT, 32 - ctz_imm);
+ __ Addu(AT, AT, in_low);
+ __ Sltu(TMP, AT, in_low);
+ __ Addu(out_high, in_high, TMP);
+ __ Srl(out_low, AT, ctz_imm);
+ if (is_r2_or_newer) {
+ __ Ins(out_low, out_high, 32 - ctz_imm, ctz_imm);
+ __ Sra(out_high, out_high, ctz_imm);
+ } else {
+ __ Sll(AT, out_high, 32 - ctz_imm);
+ __ Sra(out_high, out_high, ctz_imm);
+ __ Or(out_low, out_low, AT);
+ }
+ if (imm < 0) {
+ __ Subu(out_low, ZERO, out_low);
+ __ Sltu(AT, ZERO, out_low);
+ __ Subu(out_high, ZERO, out_high);
+ __ Subu(out_high, out_high, AT);
+ }
+ } else if (ctz_imm == 32) {
+ __ Sra(AT, in_high, 31);
+ __ Addu(AT, AT, in_low);
+ __ Sltu(AT, AT, in_low);
+ __ Addu(out_low, in_high, AT);
+ if (imm < 0) {
+ __ Srl(TMP, out_low, 31);
+ __ Subu(out_low, ZERO, out_low);
+ __ Sltu(AT, ZERO, out_low);
+ __ Subu(out_high, TMP, AT);
+ } else {
+ __ Sra(out_high, out_low, 31);
+ }
+ } else if (ctz_imm < 63) {
+ __ Sra(AT, in_high, 31);
+ __ Srl(TMP, AT, 64 - ctz_imm);
+ __ Addu(AT, AT, in_low);
+ __ Sltu(AT, AT, in_low);
+ __ Addu(out_low, in_high, AT);
+ __ Addu(out_low, out_low, TMP);
+ __ Sra(out_low, out_low, ctz_imm - 32);
+ if (imm < 0) {
+ __ Subu(out_low, ZERO, out_low);
+ }
+ __ Sra(out_high, out_low, 31);
+ } else {
+ DCHECK_LT(imm, 0);
+ if (is_r6) {
+ __ Aui(AT, in_high, 0x8000);
+ } else {
+ __ Lui(AT, 0x8000);
+ __ Xor(AT, AT, in_high);
+ }
+ __ Or(AT, AT, in_low);
+ __ Sltiu(out_low, AT, 1);
+ __ Move(out_high, ZERO);
- __ Subu(out, out, TMP);
+ } else {
+ if ((ctz_imm == 1) && !is_r6) {
+ __ Andi(AT, in_low, 1);
+ __ Sll(TMP, in_low, 31);
+ __ And(TMP, in_high, TMP);
+ __ Sra(out_high, TMP, 31);
+ __ Or(out_low, out_high, AT);
+ } else if (ctz_imm < 32) {
+ __ Sra(AT, in_high, 31);
+ if (ctz_imm <= 16) {
+ __ Andi(out_low, in_low, abs_imm - 1);
+ } else if (is_r2_or_newer) {
+ __ Ext(out_low, in_low, 0, ctz_imm);
+ } else {
+ __ Sll(out_low, in_low, 32 - ctz_imm);
+ __ Srl(out_low, out_low, 32 - ctz_imm);
+ }
+ if (is_r6) {
+ __ Selnez(out_high, AT, out_low);
+ } else {
+ __ Movz(AT, ZERO, out_low);
+ __ Move(out_high, AT);
+ }
+ if (is_r2_or_newer) {
+ __ Ins(out_low, out_high, ctz_imm, 32 - ctz_imm);
+ } else {
+ __ Sll(AT, out_high, ctz_imm);
+ __ Or(out_low, out_low, AT);
+ }
+ } else if (ctz_imm == 32) {
+ __ Sra(AT, in_high, 31);
+ __ Move(out_low, in_low);
+ if (is_r6) {
+ __ Selnez(out_high, AT, out_low);
+ } else {
+ __ Movz(AT, ZERO, out_low);
+ __ Move(out_high, AT);
+ }
+ } else if (ctz_imm < 63) {
+ __ Sra(AT, in_high, 31);
+ __ Move(TMP, in_low);
+ if (ctz_imm - 32 <= 16) {
+ __ Andi(out_high, in_high, (1 << (ctz_imm - 32)) - 1);
+ } else if (is_r2_or_newer) {
+ __ Ext(out_high, in_high, 0, ctz_imm - 32);
+ } else {
+ __ Sll(out_high, in_high, 64 - ctz_imm);
+ __ Srl(out_high, out_high, 64 - ctz_imm);
+ }
+ __ Move(out_low, TMP);
+ __ Or(TMP, TMP, out_high);
+ if (is_r6) {
+ __ Selnez(AT, AT, TMP);
+ } else {
+ __ Movz(AT, ZERO, TMP);
+ }
+ if (is_r2_or_newer) {
+ __ Ins(out_high, AT, ctz_imm - 32, 64 - ctz_imm);
+ } else {
+ __ Sll(AT, AT, ctz_imm - 32);
+ __ Or(out_high, out_high, AT);
+ }
+ } else {
+ if (is_r6) {
+ __ Aui(AT, in_high, 0x8000);
+ } else {
+ __ Lui(AT, 0x8000);
+ __ Xor(AT, AT, in_high);
+ }
+ __ Or(AT, AT, in_low);
+ __ Sltiu(AT, AT, 1);
+ __ Sll(AT, AT, 31);
+ __ Move(out_low, in_low);
+ __ Xor(out_high, in_high, AT);
+ }
@@ -3935,7 +4109,16 @@
void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
DataType::Type type = div->GetResultType();
- LocationSummary::CallKind call_kind = (type == DataType::Type::kInt64)
+ bool call_long_div = false;
+ if (type == DataType::Type::kInt64) {
+ if (div->InputAt(1)->IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(div->InputAt(1)->AsConstant());
+ call_long_div = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)));
+ } else {
+ call_long_div = true;
+ }
+ }
+ LocationSummary::CallKind call_kind = call_long_div
? LocationSummary::kCallOnMainOnly
: LocationSummary::kNoCall;
@@ -3949,12 +4132,18 @@
case DataType::Type::kInt64: {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterPairLocation(
- calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
- locations->SetInAt(1, Location::RegisterPairLocation(
- calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
- locations->SetOut(calling_convention.GetReturnLocation(type));
+ if (call_long_div) {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(1, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+ locations->SetOut(calling_convention.GetReturnLocation(type));
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister());
+ }
@@ -3979,8 +4168,20 @@
case DataType::Type::kInt64: {
- codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
+ if (locations->InAt(1).IsConstant()) {
+ int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else {
+ DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))));
+ DivRemByPowerOfTwo(instruction);
+ }
+ } else {
+ codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
+ }
case DataType::Type::kFloat32:
@@ -8561,9 +8762,16 @@
void LocationsBuilderMIPS::VisitRem(HRem* rem) {
DataType::Type type = rem->GetResultType();
- LocationSummary::CallKind call_kind = (type == DataType::Type::kInt32)
- ? LocationSummary::kNoCall
- : LocationSummary::kCallOnMainOnly;
+ bool call_rem;
+ if ((type == DataType::Type::kInt64) && rem->InputAt(1)->IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(rem->InputAt(1)->AsConstant());
+ call_rem = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)));
+ } else {
+ call_rem = (type != DataType::Type::kInt32);
+ }
+ LocationSummary::CallKind call_kind = call_rem
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
switch (type) {
@@ -8574,12 +8782,18 @@
case DataType::Type::kInt64: {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterPairLocation(
- calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
- locations->SetInAt(1, Location::RegisterPairLocation(
- calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
- locations->SetOut(calling_convention.GetReturnLocation(type));
+ if (call_rem) {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(1, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+ locations->SetOut(calling_convention.GetReturnLocation(type));
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister());
+ }
@@ -8599,14 +8813,27 @@
void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
- codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
+ if (locations->InAt(1).IsConstant()) {
+ int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else {
+ DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))));
+ DivRemByPowerOfTwo(instruction);
+ }
+ } else {
+ codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
+ }
case DataType::Type::kFloat32: {