MIPS: Support swaps between 128-bit locations
Add support for swaps between two SIMDStackSlots, two
VectorRegisters (extended FpuRegister) and between a
SIMDStackSlot and a VectorRegister.
This fixes test 623-checker-loop-regressions for
MIPS64R6 and MIPS32R6.
Test: ./testrunner.py --optimizing --target in QEMU (MIPS64R6)
Test: ./testrunner.py --optimizing --target in QEMU (MIPS32R6)
Change-Id: I36aa209f79790fb6c08b9a171f810769a6b40afc
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 1f6b214..9f4c234 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1095,17 +1095,23 @@
__ Move(r2, r1);
__ Move(r1, TMP);
} else if (loc1.IsFpuRegister() && loc2.IsFpuRegister()) {
- FRegister f1 = loc1.AsFpuRegister<FRegister>();
- FRegister f2 = loc2.AsFpuRegister<FRegister>();
- if (type == DataType::Type::kFloat32) {
- __ MovS(FTMP, f2);
- __ MovS(f2, f1);
- __ MovS(f1, FTMP);
+ if (codegen_->GetGraph()->HasSIMD()) {
+ __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1));
+ __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2));
+ __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP));
} else {
- DCHECK_EQ(type, DataType::Type::kFloat64);
- __ MovD(FTMP, f2);
- __ MovD(f2, f1);
- __ MovD(f1, FTMP);
+ FRegister f1 = loc1.AsFpuRegister<FRegister>();
+ FRegister f2 = loc2.AsFpuRegister<FRegister>();
+ if (type == DataType::Type::kFloat32) {
+ __ MovS(FTMP, f2);
+ __ MovS(f2, f1);
+ __ MovS(f1, FTMP);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat64);
+ __ MovD(FTMP, f2);
+ __ MovD(f2, f1);
+ __ MovD(f1, FTMP);
+ }
}
} else if ((loc1.IsRegister() && loc2.IsFpuRegister()) ||
(loc1.IsFpuRegister() && loc2.IsRegister())) {
@@ -1152,6 +1158,8 @@
Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false);
} else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) {
Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
+ } else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) {
+ ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex());
} else if ((loc1.IsRegister() && loc2.IsStackSlot()) ||
(loc1.IsStackSlot() && loc2.IsRegister())) {
Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>();
@@ -1174,6 +1182,13 @@
__ Move(TMP, reg_h);
__ LoadFromOffset(kLoadWord, reg_h, SP, offset_h);
__ StoreToOffset(kStoreWord, TMP, SP, offset_h);
+ } else if ((loc1.IsFpuRegister() && loc2.IsSIMDStackSlot()) ||
+ (loc1.IsSIMDStackSlot() && loc2.IsFpuRegister())) {
+ Location fp_loc = loc1.IsFpuRegister() ? loc1 : loc2;
+ intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex();
+ __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(fp_loc));
+ __ LoadQFromOffset(fp_loc.AsFpuRegister<FRegister>(), SP, offset);
+ __ StoreQToOffset(FTMP, SP, offset);
} else if (loc1.IsFpuRegister() || loc2.IsFpuRegister()) {
FRegister reg = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
: loc2.AsFpuRegister<FRegister>();
@@ -1225,6 +1240,13 @@
}
}
+void ParallelMoveResolverMIPS::ExchangeQuadSlots(int index1, int index2) {
+ __ LoadQFromOffset(FTMP, SP, index1);
+ __ LoadQFromOffset(FTMP2, SP, index2);
+ __ StoreQToOffset(FTMP, SP, index2);
+ __ StoreQToOffset(FTMP2, SP, index1);
+}
+
void CodeGeneratorMIPS::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
@@ -1790,6 +1812,11 @@
blocked_core_registers_[TMP] = true;
blocked_fpu_registers_[FTMP] = true;
+ if (GetInstructionSetFeatures().HasMsa()) {
+ // To be used just for MSA instructions.
+ blocked_fpu_registers_[FTMP2] = true;
+ }
+
// Reserve suspend and thread registers.
blocked_core_registers_[S0] = true;
blocked_core_registers_[TR] = true;
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 7845e31..cf8e7a3 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -145,6 +145,7 @@
void RestoreScratch(int reg) OVERRIDE;
void Exchange(int index1, int index2, bool double_slot);
+ void ExchangeQuadSlots(int index1, int index2);
MipsAssembler* GetAssembler() const;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 22989c8..eb64f1b 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1061,6 +1061,13 @@
__ StoreToOffset(store_type, TMP, SP, index1 + stack_offset);
}
+void ParallelMoveResolverMIPS64::ExchangeQuadSlots(int index1, int index2) {
+ __ LoadFpuFromOffset(kLoadQuadword, FTMP, SP, index1);
+ __ LoadFpuFromOffset(kLoadQuadword, FTMP2, SP, index2);
+ __ StoreFpuToOffset(kStoreQuadword, FTMP, SP, index2);
+ __ StoreFpuToOffset(kStoreQuadword, FTMP2, SP, index1);
+}
+
static dwarf::Reg DWARFReg(GpuRegister reg) {
return dwarf::Reg::Mips64Core(static_cast<int>(reg));
}
@@ -1370,6 +1377,8 @@
bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+ bool is_simd1 = loc1.IsSIMDStackSlot();
+ bool is_simd2 = loc2.IsSIMDStackSlot();
bool is_fp_reg1 = loc1.IsFpuRegister();
bool is_fp_reg2 = loc2.IsFpuRegister();
@@ -1382,17 +1391,23 @@
__ Move(r1, TMP);
} else if (is_fp_reg2 && is_fp_reg1) {
// Swap 2 FPRs
- FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
- FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
- if (type == DataType::Type::kFloat32) {
- __ MovS(FTMP, r1);
- __ MovS(r1, r2);
- __ MovS(r2, FTMP);
+ if (GetGraph()->HasSIMD()) {
+ __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1));
+ __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2));
+ __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP));
} else {
- DCHECK_EQ(type, DataType::Type::kFloat64);
- __ MovD(FTMP, r1);
- __ MovD(r1, r2);
- __ MovD(r2, FTMP);
+ FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
+ FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
+ if (type == DataType::Type::kFloat32) {
+ __ MovS(FTMP, r1);
+ __ MovS(r1, r2);
+ __ MovS(r2, FTMP);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat64);
+ __ MovD(FTMP, r1);
+ __ MovD(r1, r2);
+ __ MovD(r2, FTMP);
+ }
}
} else if (is_slot1 != is_slot2) {
// Swap GPR/FPR and stack slot
@@ -1421,6 +1436,17 @@
move_resolver_.Exchange(loc1.GetStackIndex(),
loc2.GetStackIndex(),
loc1.IsDoubleStackSlot());
+ } else if (is_simd1 && is_simd2) {
+ move_resolver_.ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex());
+ } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) {
+ Location fp_reg_loc = is_fp_reg1 ? loc1 : loc2;
+ Location mem_loc = is_fp_reg1 ? loc2 : loc1;
+ __ LoadFpuFromOffset(kLoadQuadword, FTMP, SP, mem_loc.GetStackIndex());
+ __ StoreFpuToOffset(kStoreQuadword,
+ fp_reg_loc.AsFpuRegister<FpuRegister>(),
+ SP,
+ mem_loc.GetStackIndex());
+ __ MoveV(VectorRegisterFrom(fp_reg_loc), static_cast<VectorRegister>(FTMP));
} else {
LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2;
}
@@ -1653,6 +1679,11 @@
blocked_core_registers_[TMP2] = true;
blocked_fpu_registers_[FTMP] = true;
+ if (GetInstructionSetFeatures().HasMsa()) {
+ // To be used just for MSA instructions.
+ blocked_fpu_registers_[FTMP2] = true;
+ }
+
// Reserve suspend and thread registers.
blocked_core_registers_[S0] = true;
blocked_core_registers_[TR] = true;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 2a95b37..d479410 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -142,6 +142,7 @@
void RestoreScratch(int reg) OVERRIDE;
void Exchange(int index1, int index2, bool double_slot);
+ void ExchangeQuadSlots(int index1, int index2);
Mips64Assembler* GetAssembler() const;
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index f500b58..c7f9a3e 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -101,7 +101,8 @@
F29 = 29,
F30 = 30,
F31 = 31,
- FTMP = F6, // scratch register
+ FTMP = F6, // scratch register
+ FTMP2 = F7, // scratch register (in addition to FTMP, reserved for MSA instructions)
kNumberOfFRegisters = 32,
kNoFRegister = -1,
};
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index bca260a..d3a24b6 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -102,7 +102,8 @@
F29 = 29,
F30 = 30,
F31 = 31,
- FTMP = F8, // scratch register
+ FTMP = F8, // scratch register
+ FTMP2 = F9, // scratch register (in addition to FTMP, reserved for MSA instructions)
kNumberOfFpuRegisters = 32,
kNoFpuRegister = -1,
};