MIPS32: improvements in code generation (mostly 64-bit ALU ops)
Specifically:
- Use the delay slot in InvokeRuntime() for direct entry points
- Use kNoOutputOverlap wherever possible
- Improve and/or/xor/add/sub with 64-bit integer constants
- Improve 64-bit shifts by a constant amount on R2+
- More efficient load/store of 64-bit constants (especially, 0 & +0.0)
Change-Id: I86d2217c8b5b8e2a9371effc2ce38b9eec62782b
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5dc101b..84fa0e6 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1191,17 +1191,16 @@
uint32_t dex_pc,
SlowPathCode* slow_path,
bool is_direct_entrypoint) {
+ __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+ __ Jalr(T9);
if (is_direct_entrypoint) {
// Reserve argument space on stack (for $a0-$a3) for
// entrypoints that directly reference native implementations.
// Called function may use this space to store $a0-$a3 regs.
- __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
- }
- __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
- __ Jalr(T9);
- __ Nop();
- if (is_direct_entrypoint) {
+ __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset); // Single instruction in delay slot.
__ DecreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
+ } else {
+ __ Nop(); // In delay slot.
}
RecordPcInfo(instruction, dex_pc, slow_path);
}
@@ -1275,15 +1274,9 @@
}
case Primitive::kPrimLong: {
- // TODO: can 2nd param be const?
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- if (instruction->IsAdd() || instruction->IsSub()) {
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
- } else {
- DCHECK(instruction->IsAnd() || instruction->IsOr() || instruction->IsXor());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -1350,34 +1343,142 @@
}
case Primitive::kPrimLong: {
- // TODO: can 2nd param be const?
Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
Register dst_low = locations->Out().AsRegisterPairLow<Register>();
Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
- Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>();
-
- if (instruction->IsAnd()) {
- __ And(dst_low, lhs_low, rhs_low);
- __ And(dst_high, lhs_high, rhs_high);
- } else if (instruction->IsOr()) {
- __ Or(dst_low, lhs_low, rhs_low);
- __ Or(dst_high, lhs_high, rhs_high);
- } else if (instruction->IsXor()) {
- __ Xor(dst_low, lhs_low, rhs_low);
- __ Xor(dst_high, lhs_high, rhs_high);
- } else if (instruction->IsAdd()) {
- __ Addu(dst_low, lhs_low, rhs_low);
- __ Sltu(TMP, dst_low, lhs_low);
- __ Addu(dst_high, lhs_high, rhs_high);
- __ Addu(dst_high, dst_high, TMP);
+ Location rhs_location = locations->InAt(1);
+ bool use_imm = rhs_location.IsConstant();
+ if (!use_imm) {
+ Register rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+ Register rhs_low = rhs_location.AsRegisterPairLow<Register>();
+ if (instruction->IsAnd()) {
+ __ And(dst_low, lhs_low, rhs_low);
+ __ And(dst_high, lhs_high, rhs_high);
+ } else if (instruction->IsOr()) {
+ __ Or(dst_low, lhs_low, rhs_low);
+ __ Or(dst_high, lhs_high, rhs_high);
+ } else if (instruction->IsXor()) {
+ __ Xor(dst_low, lhs_low, rhs_low);
+ __ Xor(dst_high, lhs_high, rhs_high);
+ } else if (instruction->IsAdd()) {
+ if (lhs_low == rhs_low) {
+ // Special case for lhs = rhs and the sum potentially overwriting both lhs and rhs.
+ __ Slt(TMP, lhs_low, ZERO);
+ __ Addu(dst_low, lhs_low, rhs_low);
+ } else {
+ __ Addu(dst_low, lhs_low, rhs_low);
+ // If the sum overwrites rhs, lhs remains unchanged, otherwise rhs remains unchanged.
+ __ Sltu(TMP, dst_low, (dst_low == rhs_low) ? lhs_low : rhs_low);
+ }
+ __ Addu(dst_high, lhs_high, rhs_high);
+ __ Addu(dst_high, dst_high, TMP);
+ } else {
+ DCHECK(instruction->IsSub());
+ __ Sltu(TMP, lhs_low, rhs_low);
+ __ Subu(dst_low, lhs_low, rhs_low);
+ __ Subu(dst_high, lhs_high, rhs_high);
+ __ Subu(dst_high, dst_high, TMP);
+ }
} else {
- DCHECK(instruction->IsSub());
- __ Subu(dst_low, lhs_low, rhs_low);
- __ Sltu(TMP, lhs_low, dst_low);
- __ Subu(dst_high, lhs_high, rhs_high);
- __ Subu(dst_high, dst_high, TMP);
+ int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+ if (instruction->IsOr()) {
+ uint32_t low = Low32Bits(value);
+ uint32_t high = High32Bits(value);
+ if (IsUint<16>(low)) {
+ if (dst_low != lhs_low || low != 0) {
+ __ Ori(dst_low, lhs_low, low);
+ }
+ } else {
+ __ LoadConst32(TMP, low);
+ __ Or(dst_low, lhs_low, TMP);
+ }
+ if (IsUint<16>(high)) {
+ if (dst_high != lhs_high || high != 0) {
+ __ Ori(dst_high, lhs_high, high);
+ }
+ } else {
+ if (high != low) {
+ __ LoadConst32(TMP, high);
+ }
+ __ Or(dst_high, lhs_high, TMP);
+ }
+ } else if (instruction->IsXor()) {
+ uint32_t low = Low32Bits(value);
+ uint32_t high = High32Bits(value);
+ if (IsUint<16>(low)) {
+ if (dst_low != lhs_low || low != 0) {
+ __ Xori(dst_low, lhs_low, low);
+ }
+ } else {
+ __ LoadConst32(TMP, low);
+ __ Xor(dst_low, lhs_low, TMP);
+ }
+ if (IsUint<16>(high)) {
+ if (dst_high != lhs_high || high != 0) {
+ __ Xori(dst_high, lhs_high, high);
+ }
+ } else {
+ if (high != low) {
+ __ LoadConst32(TMP, high);
+ }
+ __ Xor(dst_high, lhs_high, TMP);
+ }
+ } else if (instruction->IsAnd()) {
+ uint32_t low = Low32Bits(value);
+ uint32_t high = High32Bits(value);
+ if (IsUint<16>(low)) {
+ __ Andi(dst_low, lhs_low, low);
+ } else if (low != 0xFFFFFFFF) {
+ __ LoadConst32(TMP, low);
+ __ And(dst_low, lhs_low, TMP);
+ } else if (dst_low != lhs_low) {
+ __ Move(dst_low, lhs_low);
+ }
+ if (IsUint<16>(high)) {
+ __ Andi(dst_high, lhs_high, high);
+ } else if (high != 0xFFFFFFFF) {
+ if (high != low) {
+ __ LoadConst32(TMP, high);
+ }
+ __ And(dst_high, lhs_high, TMP);
+ } else if (dst_high != lhs_high) {
+ __ Move(dst_high, lhs_high);
+ }
+ } else {
+ if (instruction->IsSub()) {
+ value = -value;
+ } else {
+ DCHECK(instruction->IsAdd());
+ }
+ int32_t low = Low32Bits(value);
+ int32_t high = High32Bits(value);
+ if (IsInt<16>(low)) {
+ if (dst_low != lhs_low || low != 0) {
+ __ Addiu(dst_low, lhs_low, low);
+ }
+ if (low != 0) {
+ __ Sltiu(AT, dst_low, low);
+ }
+ } else {
+ __ LoadConst32(TMP, low);
+ __ Addu(dst_low, lhs_low, TMP);
+ __ Sltu(AT, dst_low, TMP);
+ }
+ if (IsInt<16>(high)) {
+ if (dst_high != lhs_high || high != 0) {
+ __ Addiu(dst_high, lhs_high, high);
+ }
+ } else {
+ if (high != low) {
+ __ LoadConst32(TMP, high);
+ }
+ __ Addu(dst_high, lhs_high, TMP);
+ }
+ if (low != 0) {
+ __ Addu(dst_high, dst_high, AT);
+ }
+ }
}
break;
}
@@ -1416,12 +1517,15 @@
Primitive::Type type = instr->GetResultType();
switch (type) {
case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
locations->SetOut(Location::RequiresRegister());
break;
- }
default:
LOG(FATAL) << "Unexpected shift type " << type;
}
@@ -1440,6 +1544,8 @@
int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
uint32_t shift_value = rhs_imm & shift_mask;
+ // Is the INS (Insert Bit Field) instruction supported?
+ bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
switch (type) {
case Primitive::kPrimInt: {
@@ -1474,21 +1580,37 @@
if (shift_value == 0) {
codegen_->Move64(locations->Out(), locations->InAt(0));
} else if (shift_value < kMipsBitsPerWord) {
- if (instr->IsShl()) {
- __ Sll(dst_low, lhs_low, shift_value);
- __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
- __ Sll(dst_high, lhs_high, shift_value);
- __ Or(dst_high, dst_high, TMP);
- } else if (instr->IsShr()) {
- __ Sra(dst_high, lhs_high, shift_value);
- __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
- __ Srl(dst_low, lhs_low, shift_value);
- __ Or(dst_low, dst_low, TMP);
+ if (has_ins) {
+ if (instr->IsShl()) {
+ __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+ __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
+ __ Sll(dst_low, lhs_low, shift_value);
+ } else if (instr->IsShr()) {
+ __ Srl(dst_low, lhs_low, shift_value);
+ __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+ __ Sra(dst_high, lhs_high, shift_value);
+ } else {
+ __ Srl(dst_low, lhs_low, shift_value);
+ __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+ __ Srl(dst_high, lhs_high, shift_value);
+ }
} else {
- __ Srl(dst_high, lhs_high, shift_value);
- __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
- __ Srl(dst_low, lhs_low, shift_value);
- __ Or(dst_low, dst_low, TMP);
+ if (instr->IsShl()) {
+ __ Sll(dst_low, lhs_low, shift_value);
+ __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
+ __ Sll(dst_high, lhs_high, shift_value);
+ __ Or(dst_high, dst_high, TMP);
+ } else if (instr->IsShr()) {
+ __ Sra(dst_high, lhs_high, shift_value);
+ __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+ __ Srl(dst_low, lhs_low, shift_value);
+ __ Or(dst_low, dst_low, TMP);
+ } else {
+ __ Srl(dst_high, lhs_high, shift_value);
+ __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+ __ Srl(dst_low, lhs_low, shift_value);
+ __ Or(dst_low, dst_low, TMP);
+ }
}
} else {
shift_value -= kMipsBitsPerWord;