MIPS64: small improvements in code generation
Specifically:
- More efficient load/store of constant 0 (and +0.0)
- Improved swapping of floats/doubles in registers
- Use kNoOutputOverlap wherever possible
- More efficient 64-bit integer comparison with 0
- More efficient load of integer constants of the form (2**n)-1
Change-Id: Ic2914d8865aa6616b9a0b21b3cc173d4477eb8c7
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index d4fcaf9..0935262 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -666,9 +666,19 @@
gpr = destination.AsRegister<GpuRegister>();
}
if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
- __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+ int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant());
+ if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+ gpr = ZERO;
+ } else {
+ __ LoadConst32(gpr, value);
+ }
} else {
- __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+ int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+ if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+ gpr = ZERO;
+ } else {
+ __ LoadConst64(gpr, value);
+ }
}
if (dst_type == Primitive::kPrimFloat) {
__ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
@@ -734,12 +744,22 @@
// Move to stack from constant
HConstant* src_cst = source.GetConstant();
StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+ GpuRegister gpr = ZERO;
if (destination.IsStackSlot()) {
- __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+ int32_t value = GetInt32ValueOf(src_cst->AsConstant());
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst32(gpr, value);
+ }
} else {
- __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+ DCHECK(destination.IsDoubleStackSlot());
+ int64_t value = GetInt64ValueOf(src_cst->AsConstant());
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst64(gpr, value);
+ }
}
- __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+ __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex());
} else {
DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
@@ -755,9 +775,7 @@
}
}
-void CodeGeneratorMIPS64::SwapLocations(Location loc1,
- Location loc2,
- Primitive::Type type ATTRIBUTE_UNUSED) {
+void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) {
DCHECK(!loc1.IsConstant());
DCHECK(!loc2.IsConstant());
@@ -781,12 +799,16 @@
// Swap 2 FPRs
FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
- // TODO: Can MOV.S/MOV.D be used here to save one instruction?
- // Need to distinguish float from double, right?
- __ Dmfc1(TMP, r2);
- __ Dmfc1(AT, r1);
- __ Dmtc1(TMP, r1);
- __ Dmtc1(AT, r2);
+ if (type == Primitive::kPrimFloat) {
+ __ MovS(FTMP, r1);
+ __ MovS(r1, r2);
+ __ MovS(r2, FTMP);
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ MovD(FTMP, r1);
+ __ MovD(r1, r2);
+ __ MovD(r2, FTMP);
+ }
} else if (is_slot1 != is_slot2) {
// Swap GPR/FPR and stack slot
Location reg_loc = is_slot1 ? loc2 : loc1;
@@ -800,7 +822,6 @@
reg_loc.AsFpuRegister<FpuRegister>(),
SP,
mem_loc.GetStackIndex());
- // TODO: review this MTC1/DMTC1 move
if (mem_loc.IsStackSlot()) {
__ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
} else {
@@ -845,12 +866,22 @@
} else {
DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
// Move to stack from constant
+ GpuRegister gpr = ZERO;
if (location.IsStackSlot()) {
- __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
- __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+ int32_t value = GetInt32ValueOf(instruction->AsConstant());
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst32(gpr, value);
+ }
+ __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex());
} else {
- __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
- __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+ DCHECK(location.IsDoubleStackSlot());
+ int64_t value = instruction->AsLongConstant()->GetValue();
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst64(gpr, value);
+ }
+ __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex());
}
}
} else if (instruction->IsTemporary()) {
@@ -1198,7 +1229,7 @@
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
default:
@@ -1707,7 +1738,7 @@
switch (in_type) {
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -1736,8 +1767,18 @@
case Primitive::kPrimLong: {
GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- // TODO: more efficient (direct) comparison with a constant
+ Location rhs_location = locations->InAt(1);
+ bool use_imm = rhs_location.IsConstant();
+ GpuRegister rhs = ZERO;
+ if (use_imm) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+ if (value != 0) {
+ rhs = AT;
+ __ LoadConst64(rhs, value);
+ }
+ } else {
+ rhs = rhs_location.AsRegister<GpuRegister>();
+ }
__ Slt(TMP, lhs, rhs);
__ Slt(dst, rhs, lhs);
__ Subu(dst, dst, TMP);
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 00e8995..cedbedb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -892,45 +892,58 @@
} else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
Lui(rd, value >> 16);
Dati(rd, (value >> 48) + bit31);
+ } else if (IsPowerOfTwo(value + UINT64_C(1))) {
+ int shift_cnt = 64 - CTZ(value + UINT64_C(1));
+ Daddiu(rd, ZERO, -1);
+ if (shift_cnt < 32) {
+ Dsrl(rd, rd, shift_cnt);
+ } else {
+ Dsrl32(rd, rd, shift_cnt & 31);
+ }
} else {
int shift_cnt = CTZ(value);
int64_t tmp = value >> shift_cnt;
if (IsUint<16>(tmp)) {
Ori(rd, ZERO, tmp);
- if (shift_cnt < 32)
+ if (shift_cnt < 32) {
Dsll(rd, rd, shift_cnt);
- else
+ } else {
Dsll32(rd, rd, shift_cnt & 31);
+ }
} else if (IsInt<16>(tmp)) {
Daddiu(rd, ZERO, tmp);
- if (shift_cnt < 32)
+ if (shift_cnt < 32) {
Dsll(rd, rd, shift_cnt);
- else
+ } else {
Dsll32(rd, rd, shift_cnt & 31);
+ }
} else if (IsInt<32>(tmp)) {
// Loads with 3 instructions.
Lui(rd, tmp >> 16);
Ori(rd, rd, tmp);
- if (shift_cnt < 32)
+ if (shift_cnt < 32) {
Dsll(rd, rd, shift_cnt);
- else
+ } else {
Dsll32(rd, rd, shift_cnt & 31);
+ }
} else {
shift_cnt = 16 + CTZ(value >> 16);
tmp = value >> shift_cnt;
if (IsUint<16>(tmp)) {
Ori(rd, ZERO, tmp);
- if (shift_cnt < 32)
+ if (shift_cnt < 32) {
Dsll(rd, rd, shift_cnt);
- else
+ } else {
Dsll32(rd, rd, shift_cnt & 31);
+ }
Ori(rd, rd, value);
} else if (IsInt<16>(tmp)) {
Daddiu(rd, ZERO, tmp);
- if (shift_cnt < 32)
+ if (shift_cnt < 32) {
Dsll(rd, rd, shift_cnt);
- else
+ } else {
Dsll32(rd, rd, shift_cnt & 31);
+ }
Ori(rd, rd, value);
} else {
// Loads with 3-4 instructions.
@@ -941,10 +954,11 @@
used_lui = true;
}
if ((tmp2 & 0xFFFF) != 0) {
- if (used_lui)
+ if (used_lui) {
Ori(rd, rd, tmp2);
- else
+ } else {
Ori(rd, ZERO, tmp2);
+ }
}
if (bit31) {
tmp2 += UINT64_C(0x100000000);